ref: 5ba95fdb07ddc2c32111a1b2f57f17aa27fcbbf5
parent: fa03455b5057675b18d1c87aef2d1071b2088de0
author: mischief <[email protected]>
date: Tue Jun 24 14:02:25 EDT 2014
import xen 32 bit paravirtual kernel from /n/sources/xen.
--- /dev/null
+++ b/sys/src/9/xen/archxen.c
@@ -1,0 +1,115 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+static int
+identify(void)
+{
+ m->havepge = 0;
+ return 0;
+}
+
+static void
+intrinit(void)
+{
+ ulong machs;
+ int i, ncpu;
+ char *cp;
+ char node[32];
+ char buf[32];
+
+ if((cp = getconf("*nomp")) != nil && strtol(cp, 0, 0) != 0)
+ return;
+ ncpu = MAX_VIRT_CPUS;
+ if (cp = getconf("*ncpu")) {
+ ncpu = strtol(cp, 0, 0);
+ if (ncpu < 1)
+ ncpu = 1;
+ }
+ machs = 1;
+ for (i = 1; i < ncpu; i++) {
+ sprint(node, "cpu/%d/availability", i);
+ if (xenstore_read(node, buf, sizeof buf) <= 0)
+ break;
+ print("%s: %s\n", node, buf);
+ if (strcmp(buf, "online") == 0) {
+ machs |= 1<<i;
+ conf.nmach++;
+ }
+ }
+ if (conf.nmach > 1) {
+ print("Sorry, SMP not supported yet: 1 of %lud CPUs startd\n", conf.nmach);
+ conf.nmach = 1;
+ }
+}
+
+static void
+shutdown(void)
+{
+ HYPERVISOR_shutdown(1);
+}
+
+int xenintrenable(Vctl *v);
+int xenintrvecno(int irq);
+int xenintrdisable(int irq);
+void xentimerenable(void);
+uvlong xentimerread(uvlong*);
+void xentimerset(uvlong);
+
+PCArch archxen = {
+.id= "Xen",
+.ident= identify,
+.reset= shutdown,
+.intrinit= intrinit,
+.intrenable= xenintrenable,
+.intrvecno= xenintrvecno,
+.intrdisable= xenintrdisable,
+.clockenable= xentimerenable,
+.fastclock= xentimerread,
+.timerset= xentimerset,
+};
+
+/*
+ * Placeholders to satisfy external references in generic devarch.c
+ */
+ulong getcr4(void) { return 0; }
+void putcr4(ulong) {}
+int inb(int) { return 0; }
+ushort ins(int) { return 0; }
+ulong inl(int) { return 0; }
+void outb(int, int) {}
+void outs(int, ushort) {}
+void outl(int, ulong) {}
+void i8042reset(void) {}
+void i8253enable(void) {}
+void i8253init(void) {}
+void i8253link(void) {}
+uvlong i8253read(uvlong*) { return 0; }
+void i8253timerset(uvlong) {}
+int i8259disable(int) { return 0; }
+int i8259enable(Vctl*) { return 0; }
+void i8259init(void) {}
+int i8259isr(int) { return 0; }
+void i8259on(void) {}
+void i8259off(void) {}
+int i8259vecno(int) { return 0; }
+int mtrrprint(char*, long) { return 0; }
+int mtrr(uvlong, uvlong, char *) { return 0; }
+
+/*
+ * XXX until fpsave is debugged
+ */
+void
+fpssesave(FPsave* f)
+{
+ fpx87save(f);
+}
+
+void
+fpsserestore(FPsave* f)
+{
+ fpx87restore(f);
+}
--- /dev/null
+++ b/sys/src/9/xen/cppx
@@ -1,0 +1,41 @@
+#!/bin/rc
+awk '
+function qq(s) {
+ gsub("\"", "£", s)
+ return "\"@" s "\""
+}
+
+/^#include/ { next
+}
+/^#define.*\\$/ {
+ save[n++] = $0
+ print qq($0 "\\")
+ next
+}
+(n > 0) && /\\$/ {
+ save[n++] = $0
+ print qq($0 "\\")
+ next
+}
+(n > 0) {
+ save[n++] = $0
+ print qq($0)
+ for (i = 0; i < n; i++) print save[i]
+ n = 0
+ next
+}
+/^# *((define)|(error)).*[^\\]*$/ {
+ print qq($0)
+ next
+}
+/^# *undef.*[^\\]*$/ {
+ print qq($0)
+ next
+}
+}
+' $* |
+cpp -P |
+sed -e 's/£/"/g' -e 's/^"@(.*\\)\\"$/\1/' -e 's/^"@(.*)"$/\1/'
--- /dev/null
+++ b/sys/src/9/xen/dat.h
@@ -1,0 +1,53 @@
+#include "../pc/dat.h"
+
+typedef unsigned char uint8_t;
+typedef unsigned char uint8;
+typedef unsigned short uint16_t;
+typedef unsigned long uint32_t;
+typedef unsigned long long uint64_t;
+typedef char int8_t;
+typedef short int16_t;
+typedef long int32_t;
+typedef long long int64_t;
+
+#define __attribute__(x)
+enum {
+ EINVAL,
+ EACCES,
+ EEXIST,
+ EISDIR,
+ ENOENT,
+ ENOMEM,
+ ENOSPC,
+ EIO,
+ ENOTEMPTY,
+ ENOSYS,
+ EROFS,
+ EBUSY,
+ EAGAIN,
+ EISCONN,
+};
+
+#include "xendat.h"
+
+#undef mk_unsigned_long
+#define mk_unsigned_long(x) ((unsigned long)(x))
+
+#ifndef set_xen_guest_handle
+#define set_xen_guest_handle(hnd, val) hnd = val
+#endif
+
+extern ulong hypervisor_virt_start;
+extern ulong *patomfn, *matopfn;
+extern start_info_t *xenstart;
+extern ulong xentop;
+extern shared_info_t *HYPERVISOR_shared_info;
+
+/*
+ * Fake kmap
+ * XXX is this still viable?
+ */
+#undef VA
+#define VA(k) ((ulong)(k))
+#define kmap(p) (KMap*)((p)->pa|KZERO)
+#define kunmap(k)
--- /dev/null
+++ b/sys/src/9/xen/devrtc.c
@@ -1,0 +1,96 @@
+/*
+ * Xen wall clock
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+enum{
+ Qdir = 0,
+ Qrtc,
+};
+
+Dirtab rtcdir[]={
+ ".", {Qdir, 0, QTDIR}, 0, 0555,
+ "rtc", {Qrtc, 0}, 0, 0664,
+};
+
+static long
+rtcread(Chan *c, void *a, long n, vlong offset)
+{
+ if(c->qid.type & QTDIR)
+ return devdirread(c, a, n, rtcdir, nelem(rtcdir), devgen);
+
+ switch((ulong)c->qid.path){
+ case Qrtc:
+ return readnum((ulong)offset, a, n, xenwallclock(), 12);
+ }
+ error(Ebadarg);
+ return 0;
+}
+
+static long
+rtcwrite(Chan*c, void*, long n, vlong)
+{
+ switch((ulong)c->qid.path){
+ case Qrtc:
+ return n;
+ }
+ error(Eperm);
+ return 0;
+}
+
+static Chan*
+rtcattach(char* spec)
+{
+ return devattach('r', spec);
+}
+
+static Walkqid*
+rtcwalk(Chan* c, Chan *nc, char** name, int nname)
+{
+ return devwalk(c, nc, name, nname, rtcdir, nelem(rtcdir), devgen);
+}
+
+static int
+rtcstat(Chan* c, uchar* dp, int n)
+{
+ return devstat(c, dp, n, rtcdir, nelem(rtcdir), devgen);
+}
+
+static Chan*
+rtcopen(Chan* c, int omode)
+{
+ return devopen(c, omode, rtcdir, nelem(rtcdir), devgen);
+}
+
+static void
+rtcclose(Chan*)
+{
+}
+
+Dev rtcdevtab = {
+ 'r',
+ "rtc",
+
+ devreset,
+ devinit,
+ devshutdown,
+ rtcattach,
+ rtcwalk,
+ rtcstat,
+ rtcopen,
+ devcreate,
+ rtcclose,
+ rtcread,
+ devbread,
+ rtcwrite,
+ devbwrite,
+ devremove,
+ devwstat,
+};
+
--- /dev/null
+++ b/sys/src/9/xen/devxenstore.c
@@ -1,0 +1,590 @@
+/*
+ * Driver for xenstore - database shared between domains, used by xenbus to
+ * communicate configuration info.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../pc/io.h"
+
+#define LOG(a)
+
+typedef struct Aux Aux;
+
+enum {
+ Qtopdir,
+ Qctl,
+ Qwatch,
+ WRITING = 0,
+ READING,
+ WATCHING,
+ MAXIO = 8*1024,
+};
+
+Dirtab xsdir[] = {
+ ".", {Qtopdir, 0, QTDIR}, 0, 0555,
+ "xenstore", {Qctl, 0}, 0, 0660,
+ "xenwatch", {Qwatch, 0}, 0, 0440,
+};
+
+struct {
+ struct xenstore_domain_interface *intf;
+ struct xsd_sockmsg hdr;
+ int hdrvalid;
+ int evtchn;
+ int nextreqid;
+ Aux *rhead;
+ Aux *kernelaux;
+ Queue *evq;
+ Rendez wr;
+ Rendez rr;
+ QLock;
+ Lock rlock;
+} xenstore;
+
+struct Aux {
+ QLock;
+ Rendez qr;
+ Queue *ioq;
+ Aux *next;
+ int state;
+ int reqid;
+};
+
+static char Ephase[] = "phase error";
+static char Eproto[] = "protocol error";
+static char NodeShutdown[] = "control/shutdown";
+
+static void xenbusproc(void*);
+
+static int
+notfull(void*)
+{
+ struct xenstore_domain_interface *xs = xenstore.intf;
+
+ return (xs->req_prod-xs->req_cons) < XENSTORE_RING_SIZE;
+}
+
+static int
+notempty(void*)
+{
+ struct xenstore_domain_interface *xs = xenstore.intf;
+
+ return xs->rsp_prod > xs->rsp_cons;
+}
+
+static int
+ishead(void* a)
+{
+ return xenstore.rhead == a;
+}
+
+static void
+xsintr(Ureg*, void*)
+{
+ LOG(dprint("xsintr\n");)
+ wakeup(&xenstore.rr);
+ wakeup(&xenstore.wr);
+}
+
+static void
+xwrite(Queue *q, char *buf, int len)
+{
+ struct xenstore_domain_interface *xs;
+ int m, n;
+ XENSTORE_RING_IDX idx;
+
+ xs = xenstore.intf;
+ while (len > 0) {
+ n = XENSTORE_RING_SIZE - (xs->req_prod - xs->req_cons);
+ if (n == 0) {
+ xenchannotify(xenstore.evtchn);
+ sleep(&xenstore.wr, notfull, 0);
+ continue;
+ }
+ if (n > len)
+ n = len;
+ idx = MASK_XENSTORE_IDX(xs->req_prod);
+ m = XENSTORE_RING_SIZE - idx;
+ if (m > n)
+ m = n;
+ if (q)
+ qread(q, xs->req+idx, m);
+ else
+ memmove(xs->req+idx, buf, m);
+ if (m < n) {
+ if (q)
+ qread(q, xs->req, n-m);
+ else
+ memmove(xs->req, buf+m, n-m);
+ }
+ coherence();
+ xs->req_prod += n;
+ xenchannotify(xenstore.evtchn);
+ if (buf)
+ buf += n;
+ len -= n;
+ }
+}
+
+static void
+xread(Queue *q, char *buf, int len)
+{
+ struct xenstore_domain_interface *xs = xenstore.intf;
+ int n, m;
+ XENSTORE_RING_IDX idx;
+
+ for (n = len; n > 0; n -= m) {
+ while (xs->rsp_prod == xs->rsp_cons) {
+ xenchannotify(xenstore.evtchn);
+ if (up == 0)
+ HYPERVISOR_yield();
+ else
+ sleep(&xenstore.rr, notempty, 0);
+ }
+ idx = MASK_XENSTORE_IDX(xs->rsp_cons);
+ m = xs->rsp_prod - xs->rsp_cons;
+ if (m > n)
+ m = n;
+ if (m > XENSTORE_RING_SIZE - idx)
+ m = XENSTORE_RING_SIZE - idx;
+ if (q)
+ qwrite(q, xs->rsp+idx, m);
+ else if (buf) {
+ memmove(buf, xs->rsp+idx, m);
+ buf += m;
+ }
+ coherence();
+ xs->rsp_cons += m;
+ }
+ xenchannotify(xenstore.evtchn);
+}
+
+static void
+xsrpc(Aux *aux)
+{
+ Queue *q;
+ Aux *l, *r, **lp;
+ struct xsd_sockmsg hdr;
+ long n;
+
+ q = aux->ioq;
+
+ if (aux->state == WATCHING)
+ aux->reqid = 0;
+ else {
+ /* get the request header and check validity */
+ if (qlen(q) < sizeof hdr)
+ error(Eproto);
+ qread(q, &hdr, sizeof hdr);
+ n = hdr.len;
+ if (qlen(q) != n)
+ error(Eproto);
+ qlock(&xenstore);
+ /* generate a unique request id */
+ aux->reqid = ++xenstore.nextreqid;
+ hdr.req_id = aux->reqid;
+ hdr.tx_id = 0;
+ /* send the request */
+ xwrite(0, (char*)&hdr, sizeof hdr);
+ xwrite(q, 0, n);
+ qunlock(&xenstore);
+ }
+
+ /* join list of requests awaiting response */
+ ilock(&xenstore.rlock);
+ if (xenstore.rhead == 0) {
+ aux->next = 0;
+ xenstore.rhead = aux;
+ } else {
+ aux->next = xenstore.rhead->next;
+ xenstore.rhead->next = aux;
+ }
+ iunlock(&xenstore.rlock);
+
+ /* loop until matching response header has been received */
+ if (waserror()) {
+ ilock(&xenstore.rlock);
+ for (lp = &xenstore.rhead; *lp && *lp != aux; lp = &(*lp)->next)
+ ;
+ if (*lp != 0) {
+ *lp = (*lp)->next;
+ if (lp == &xenstore.rhead && *lp)
+ wakeup(&(*lp)->qr);
+ }
+ iunlock(&xenstore.rlock);
+ nexterror();
+ }
+ for (;;) {
+ /* wait until this request reaches head of queue */
+ if (xenstore.rhead != aux)
+ sleep(&aux->qr, ishead, aux);
+ /* wait until a response header (maybe for another request) has been read */
+ if (!xenstore.hdrvalid) {
+ xread(0, (char*)&xenstore.hdr, sizeof xenstore.hdr);
+ xenstore.hdrvalid = 1;
+ }
+ if (xenstore.hdr.req_id == aux->reqid)
+ break;
+ /* response was for a different request: move matching request to head of queue */
+ ilock(&xenstore.rlock);
+ for (l = xenstore.rhead; r = l->next; l = r)
+ if (xenstore.hdr.req_id == r->reqid) {
+ l->next = r->next;
+ r->next = xenstore.rhead;
+ xenstore.rhead = r;
+ break;
+ }
+ iunlock(&xenstore.rlock);
+ if (r) {
+ /* wake the matching request */
+ wakeup(&r->qr);
+ } else {
+ /* response without a request: should be a watch event */
+ xenstore.hdrvalid = 0;
+ xread(0, 0, xenstore.hdr.len);
+ continue;
+ }
+ }
+
+ /* queue the response header, and data if any, for the caller to read */
+ qwrite(q, &xenstore.hdr, sizeof xenstore.hdr);
+ xenstore.hdrvalid = 0;
+ /* read the data, if any */
+ if (xenstore.hdr.len > 0)
+ xread(q, 0, xenstore.hdr.len);
+
+ /* remove finished request and wake the next request on the queue */
+ ilock(&xenstore.rlock);
+ xenstore.rhead = aux->next;
+ iunlock(&xenstore.rlock);
+ poperror();
+ if (xenstore.rhead != 0)
+ wakeup(&xenstore.rhead->qr);
+}
+
+static void
+xsreset()
+{
+ LOG(dprint("xsreset\n");)
+}
+
+static void
+xsinit()
+{
+ intrenable(xenstore.evtchn, xsintr, 0, BUSUNKNOWN, "Xen store");
+ kproc("xenbus", xenbusproc, 0);
+}
+
+static Chan*
+xsattach(char *spec)
+{
+ return devattach('x', spec);
+}
+
+static Walkqid*
+xswalk(Chan *c, Chan *nc, char **name, int nname)
+{
+ return devwalk(c, nc, name, nname, xsdir, nelem(xsdir), devgen);
+}
+
+static int
+xsstat(Chan *c, uchar *dp, int n)
+{
+ return devstat(c, dp, n, xsdir, nelem(xsdir), devgen);
+}
+
+static Aux*
+auxalloc(int initstate)
+{
+ Aux *aux;
+ Queue *q;
+
+ aux = mallocz(sizeof(Aux), 1);
+ if (aux == 0)
+ return 0;
+ q = qopen(MAXIO, 0, 0, 0);
+ if (q == 0) {
+ free(aux);
+ return 0;
+ }
+ qnoblock(q, 1);
+ aux->state = initstate;
+ aux->ioq = q;
+ return aux;
+}
+
+static Chan*
+xsopen(Chan *c, int omode)
+{
+ Aux *aux;
+ int state;
+
+ c = devopen(c, omode, xsdir, nelem(xsdir), devgen);
+ state = WRITING;
+ switch ((ulong)c->qid.path) {
+ case Qwatch:
+ state = WATCHING;
+ /* fall through */
+ case Qctl:
+ aux = auxalloc(state);
+ if (aux == 0) {
+ c->flag &= ~COPEN;
+ error(Enomem);
+ }
+ c->aux = aux;
+ break;
+ }
+ return c;
+}
+
+static void
+xsclose(Chan* c)
+{
+ Aux *aux;
+
+ if ((c->flag&COPEN) == 0)
+ return;
+
+ switch ((ulong)c->qid.path) {
+ case Qwatch:
+ case Qctl:
+ if ((aux = (Aux*)c->aux) != 0) {
+ qfree(aux->ioq);
+ free(aux);
+ c->aux = 0;
+ }
+ break;
+ }
+}
+
+static long
+xsread(Chan *c, void *a, long n, vlong off)
+{
+ Aux *aux;
+ Queue *q;
+ long nr;
+
+ USED(off);
+ if (c->qid.type == QTDIR)
+ return devdirread(c, a, n, xsdir, nelem(xsdir), devgen);
+
+ aux = (Aux*)c->aux;
+ qlock(aux);
+ if (waserror()) {
+ qunlock(aux);
+ nexterror();
+ }
+ q = aux->ioq;
+ switch (aux->state) {
+ case WRITING:
+ if (qlen(q) == 0)
+ error(Ephase);
+ xsrpc(aux);
+ aux->state = READING;
+ break;
+ case WATCHING:
+ if (qlen(q) == 0)
+ xsrpc(aux);
+ break;
+ }
+ if (!qcanread(q))
+ nr = 0;
+ else
+ nr = qread(q, a, n);
+ qunlock(aux);
+ poperror();
+ return nr;
+}
+
+static long
+xswrite(Chan *c, void *a, long n, vlong off)
+{
+ Aux *aux;
+ Queue *q;
+ long nr;
+
+ if (c->qid.type == QTDIR)
+ error(Eperm);
+ if ((ulong)c->qid.path == Qwatch)
+ error(Ebadusefd);
+
+ aux = (Aux*)c->aux;
+ qlock(aux);
+ if (waserror()) {
+ qunlock(aux);
+ nexterror();
+ }
+ q = aux->ioq;
+ if ((off == 0 || aux->state == READING) && qlen(q) > 0)
+ qflush(q);
+ aux->state = WRITING;
+ nr = qwrite(aux->ioq, a, n);
+ qunlock(aux);
+ poperror();
+ return nr;
+}
+
+Dev xenstoredevtab = {
+ 'x',
+ "xenstore",
+
+ xsreset,
+ xsinit,
+ devshutdown,
+ xsattach,
+ xswalk,
+ xsstat,
+ xsopen,
+ devcreate,
+ xsclose,
+ xsread,
+ devbread,
+ xswrite,
+ devbwrite,
+ devremove,
+ devwstat,
+};
+
+static char*
+xscmd(Aux *aux, char *buf, int cmd, char *s, char *val)
+{
+ struct xsd_sockmsg *msg;
+ char *arg;
+ long n;
+
+ msg = (struct xsd_sockmsg*)buf;
+ arg = buf + sizeof(*msg);
+ msg->type = cmd;
+ msg->len = strlen(s)+1;
+ if (val) {
+ msg->len += strlen(val);
+ if (cmd == XS_WATCH)
+ msg->len++; /* stupid special case */
+ }
+ strcpy(arg, s);
+ if (val)
+ strcpy(arg+strlen(s)+1, val);
+ n = sizeof(*msg)+msg->len;
+ if (up == 0) {
+ msg->req_id = 1;
+ msg->tx_id = 0;
+ xwrite(0, buf, n);
+ xread(0, buf, sizeof(*msg));
+ xread(0, arg, msg->len);
+ } else {
+ qlock(aux);
+ if (qlen(aux->ioq) > 0)
+ qflush(aux->ioq);
+ qwrite(aux->ioq, buf, n);
+ xsrpc(aux);
+ qread(aux->ioq, buf, sizeof(*msg));
+ LOG(dprint("xs: type %d req_id %d len %d\n", msg->type, msg->req_id, msg->len);)
+ // XXX buffer overflow
+ qread(aux->ioq, arg, msg->len);
+ qunlock(aux);
+ }
+ arg[msg->len] = 0;
+ if (msg->type == XS_ERROR) {
+ return 0;
+ }
+ return arg;
+}
+
+static void
+intfinit(void)
+{
+ if (xenstore.intf == 0) {
+ xenstore.intf = (struct xenstore_domain_interface*)mmumapframe(XENBUS, xenstart->store_mfn);
+ xenstore.evtchn = xenstart->store_evtchn;
+ xenstore.kernelaux = auxalloc(WRITING);
+ }
+}
+
+void
+xenstore_write(char *s, char *val)
+{
+ char buf[512];
+
+ intfinit();
+ xscmd(xenstore.kernelaux, buf, XS_WRITE, s, val);
+}
+
+int
+xenstore_read(char *s, char *val, int len)
+{
+ char buf[512];
+ char *p;
+
+ intfinit();
+ p = xscmd(xenstore.kernelaux, buf, XS_READ, s, nil);
+ if (p == 0)
+ return -1;
+ strecpy(val, val+len, p);
+ return 1;
+}
+
+void
+xenstore_setd(char *dir, char *node, int value)
+{
+ int off;
+ char buf[12];
+
+ off = strlen(dir);
+ sprint(dir+off, "%s", node);
+ sprint(buf, "%ud", value);
+ xenstore_write(dir, buf);
+ dir[off] = 0;
+}
+
+int
+xenstore_gets(char *dir, char *node, char *buf, int buflen)
+{
+ int off;
+ int n;
+
+ off = strlen(dir);
+ sprint(dir+off, "%s", node);
+ n = xenstore_read(dir, buf, buflen);
+ dir[off] = 0;
+ return n;
+}
+
+static void
+xenbusproc(void*)
+{
+ Chan *c;
+ Aux *aux;
+ char *p;
+ struct xsd_sockmsg msg;
+ char buf[512];
+ int n, m;
+
+ c = namec("#x/xenstore", Aopen, ORDWR, 0);
+ aux = (Aux*)c->aux;
+ c = namec("#x/xenwatch", Aopen, OREAD, 0);
+ xscmd(aux, buf, XS_WATCH, NodeShutdown, "$");
+ for (;;) {
+ xsread(c, &msg, sizeof(msg), 0);
+ for (n = msg.len; n > 0; n -= m)
+ m = xsread(c, buf, msg.len, sizeof(msg));
+ buf[msg.len] = 0;
+ if (strcmp(buf, NodeShutdown) != 0)
+ continue;
+ p = xscmd(aux, buf, XS_READ, NodeShutdown, nil);
+ if (p == nil)
+ continue;
+ if (strcmp(p, "poweroff") == 0)
+ reboot(nil, nil, 0);
+ else if (strcmp(p, "reboot") == 0)
+ exit(0);
+ else {
+ print("xenbus: %s=%s\n", NodeShutdown, p);
+ xscmd(aux, buf, XS_WRITE, NodeShutdown, "");
+ }
+ }
+}
--- /dev/null
+++ b/sys/src/9/xen/dpart.c
@@ -1,0 +1,248 @@
+#include <u.h>
+#include <libc.h>
+#include <disk.h>
+
+typedef void Fs;
+#include "/sys/src/boot/pc/dosfs.h"
+
+enum {
+ Npart = 32
+};
+
+#define GSHORT(p) (((p)[1]<<8)|(p)[0])
+#define GLONG(p) ((GSHORT(p+2)<<16)|GSHORT(p))
+
+int
+readdisk(Disk *d, void *buf, vlong off, int len)
+{
+ if(seek(d->fd, off, 0) == -1
+ || read(d->fd, buf, len) != len)
+ return -1;
+ return 0;
+}
+
+void
+addpart(Disk *d, char *name, ulong s, ulong e)
+{
+ print("%s: part %s %lud %lud\n", d->prefix, name, s, e);
+ fprint(d->ctlfd, "part %s %lud %lud\n", name, s, e);
+}
+
+int
+isdos(int t)
+{
+ return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X;
+}
+
+int
+isextend(int t)
+{
+ return t==EXTEND || t==EXTHUGE || t==LEXTEND;
+}
+
+
+
+/* build a cdboot partition if there is an embedded boot floppy image */
+int
+cdpart(Disk *d)
+{
+ uchar buf[2048];
+ ulong a, n;
+ uchar *p;
+
+ if(readdisk(d, buf, 17*2048, 2048) == -1
+ || strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0)
+ return 0;
+
+ p = buf + 0x47;
+ a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
+
+ if(readdisk(d, buf, a*2048, 2048) == -1
+ || memcmp((char*)buf, "\x01\x00\x00\x00", 4) != 0
+ || memcmp((char*)buf+30, "\x55\xAA", 2) != 0
+ || buf[0x20] != 0x88)
+ return 0;
+
+ p = buf+0x28;
+ a = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
+
+ switch(buf[0x21]) {
+ case 1: n = 1200*1024; break;
+ case 2: n = 1440*1024; break;
+ case 3: n = 2880*1024; break;
+ default: return 0;
+ }
+
+ a = a * (uvlong)2048 / d->secsize;
+ n /= d->secsize;
+ addpart(d, "cdboot", a, a+n);
+ return 1;
+}
+
+int
+p9part(Disk *d, char *name, ulong pstart)
+{
+ char partbuf[512];
+ char *field[4], *line[Npart+1], *name2;
+ ulong start, end;
+ int i, n;
+
+ name2 = smprint("%s%s", d->prefix, name);
+ d = opendisk(name2, 1, 0);
+ if(!d) {
+ fprint(2, "%s: %r\n", name2);
+ free(name2);
+ return 0;
+ }
+ free(name2);
+
+ if(readdisk(d, partbuf, 512, sizeof partbuf) == -1)
+ return 0;
+ partbuf[sizeof partbuf - 1] = '\0';
+ if(strncmp(partbuf, "part ", 5) != 0
+ || (n = getfields(partbuf, line, Npart+1, 0, "\n")) == 0)
+ return 0;
+ for(i = 0; i < n; i++) {
+ if(strncmp(line[i], "part ", 5) != 0)
+ break;
+ if(getfields(line[i], field, 4, 0, " ") != 4)
+ break;
+ start = strtoul(field[2], 0, 0);
+ end = strtoul(field[3], 0, 0);
+ if(start >= end)
+ break;
+ addpart(d, field[1], pstart+start, pstart+end);
+ }
+ return 0;
+}
+
+int
+mbrpart(Disk *d)
+{
+ uchar mbrbuf[512];
+ char name[10];
+ Dospart *dp;
+ ulong taboffset, start, end;
+ ulong firstxpart, nxtxpart;
+ int i, nplan9, havedos;
+
+#define readmbr() \
+ if(readdisk(d, mbrbuf, (uvlong)taboffset*512, sizeof mbrbuf) == -1 \
+ || mbrbuf[0x1FE] != 0x55 || mbrbuf[0x1FF] != 0xAA) \
+ return 0
+
+ if(d->secsize > 512)
+ return 0;
+ dp = (Dospart*)&mbrbuf[0x1BE];
+ taboffset = 0;
+
+ if(1) {
+ /* get the MBR (allowing for DMDDO) */
+ readmbr();
+ for(i = 0; i < 4; i++) {
+ if(dp[i].type == DMDDO) {
+ taboffset = 63;
+ readmbr();
+ i = -1; /* start over */
+ }
+ }
+ }
+
+ /*
+ * Read the partitions, first from the MBR and then
+ * from successive extended partition tables.
+ */
+ nplan9 = 0;
+ havedos = 0;
+ firstxpart = 0;
+ for(;;) {
+ readmbr();
+ nxtxpart = 0;
+ for(i = 0; i < 4; i++) {
+ /* partition offsets are relative to taboffset */
+ start = taboffset+GLONG(dp[i].start);
+ end = start+GLONG(dp[i].len);
+ if(dp[i].type == PLAN9) {
+ if(nplan9 == 0)
+ strcpy(name, "plan9");
+ else
+ sprint(name, "plan9.%d", nplan9);
+ addpart(d, name, start, end);
+ p9part(d, name, start);
+ nplan9++;
+ }
+
+ if(!havedos && isdos(dp[i].type)) {
+ havedos = 1;
+ addpart(d, "dos", start, end);
+ }
+
+ /* nxtxpart is relative to firstxpart (or 0), not taboffset */
+ if(isextend(dp[i].type))
+ nxtxpart = start-taboffset+firstxpart;
+ }
+ if(!nxtxpart)
+ break;
+ if(!firstxpart)
+ firstxpart = nxtxpart;
+ taboffset = nxtxpart;
+ }
+ return nplan9 + havedos;
+}
+
+void
+partall(void)
+{
+ Disk *d;
+ Dir *ent;
+ char *name;
+ int fd, i, n;
+
+ fd = open("#S", OREAD);
+ if(fd == -1) {
+ fprint(2, "No disk\n");
+ return;
+ }
+
+ while((n = dirread(fd, &ent)) > 0) {
+ for(i = 0; i < n; i++) {
+ if(ent[i].mode & DMDIR) {
+ name = smprint("#S/%s/data", ent[i].name);
+ d = opendisk(name, 1, 0);
+ if(!d) {
+ fprint(2, "%s: %r\n", name);
+ continue;
+ }
+ // XXX not safe yet: if(!mbrpart(d) && !cdpart(d) && !p9part(d, "data", 0))
+ if(!mbrpart(d) && !cdpart(d))
+ fprint(2, "%s: no partitions\n", name);
+ close(d->fd);
+ }
+ }
+ }
+ close(fd);
+}
+
+
+void
+main(int argc, char **argv)
+{
+ USED(argc, argv);
+
+ fmtinstall('r', errfmt);
+
+ bind("#c", "/dev", MBEFORE);
+ open("/dev/cons", OREAD);
+ open("/dev/cons", OWRITE);
+ open("/dev/cons", OWRITE);
+
+ partall();
+
+ close(0);
+ close(1);
+ close(2);
+
+ exec("/boot/boot2", argv);
+
+ exits(0);
+}
--- /dev/null
+++ b/sys/src/9/xen/etherxen.c
@@ -1,0 +1,494 @@
+/*
+ * Xen virtual network interface frontend
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+#include "etherif.h"
+
+#define LOG(a)
+
+enum {
+ Nvif = 4,
+ Ntb = 16,
+ Nrb = 32,
+};
+
+typedef struct Ctlr Ctlr;
+typedef union Txframe Txframe;
+typedef union Rxframe Rxframe;
+
+struct Ctlr {
+ int attached;
+ int backend;
+ int vifno;
+ int evtchn;
+ int rxcopy;
+ Txframe *txframes;
+ Txframe *freetxframe;
+ Rxframe *rxframes;
+ netif_tx_front_ring_t txring;
+ netif_rx_front_ring_t rxring;
+ int *txrefs;
+ int *rxrefs;
+ int txringref;
+ int rxringref;
+ Lock txlock;
+ QLock attachlock;
+ Rendez wtxframe;
+ Rendez wtxblock;
+
+ ulong interrupts;
+ ulong transmits;
+ ulong receives;
+ ulong txerrors;
+ ulong rxerrors;
+ ulong rxoverflows;
+};
+
+union Txframe {
+ struct {
+ Txframe *next;
+ char data[2];
+ } tf;
+ uchar page[BY2PG];
+};
+
+union Rxframe {
+ uchar page[BY2PG];
+};
+
+static int nvif;
+
+/*
+ * conversions to machine page numbers, pages and addresses
+ */
+#define MFN(pa) (patomfn[(pa)>>PGSHIFT])
+#define MFNPG(pa) (MFN(pa)<<PGSHIFT)
+#define PA2MA(pa) (MFNPG(pa) | PGOFF(pa))
+#define VA2MA(va) PA2MA(PADDR(va))
+
+static int
+puttxrequest(Ctlr *ctlr, netif_tx_request_t *tr)
+{
+ netif_tx_request_t *req;
+ int i, notify;
+
+ LOG(dprint("puttxrequest id %d ref %d size %d\n", tr->id, tr->gref, tr->size);)
+ i = ctlr->txring.req_prod_pvt;
+ req = RING_GET_REQUEST(&ctlr->txring, i);
+ memmove(req, tr, sizeof(*req));
+ ctlr->txring.req_prod_pvt = i+1;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->txring, notify);
+ return notify;
+}
+
+static int
+putrxrequest(Ctlr *ctlr, netif_rx_request_t *rr)
+{
+ netif_rx_request_t *req;
+ int i;
+ int notify;
+
+ LOG(dprint("putrxrequest %d %d\n", rr->id, rr->gref);)
+ i = ctlr->rxring.req_prod_pvt;
+ req = RING_GET_REQUEST(&ctlr->rxring, i);
+ memmove(req, rr, sizeof(*req));
+ ctlr->rxring.req_prod_pvt = i+1;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->rxring, notify);
+ return notify;
+}
+
+static int
+gettxresponse(Ctlr *ctlr, netif_tx_response_t *tr)
+{
+ int i, avail;
+ netif_tx_response_t *rx;
+
+ RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->txring, avail);
+ if (!avail)
+ return 0;
+ i = ctlr->txring.rsp_cons;
+ rx = RING_GET_RESPONSE(&ctlr->txring, i);
+ LOG(dprint("gettxresponse id %d status %d\n", rx->id, rx->status);)
+ if(rx->status)
+ ctlr->txerrors++;
+ *tr = *rx;
+ ctlr->txring.rsp_cons = ++i;
+ return 1;
+}
+
+static int
+getrxresponse(Ctlr *ctlr, netif_rx_response_t* rr)
+{
+ int i, avail;
+ netif_rx_response_t *rx;
+
+ RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->rxring, avail);
+ if (!avail)
+ return 0;
+ i = ctlr->rxring.rsp_cons;
+ rx = RING_GET_RESPONSE(&ctlr->rxring, i);
+ LOG(dprint("getrxresponse id %d offset %d flags %ux status %d\n", rx->id, rx->offset, rx->flags, rx->status);)
+ *rr = *rx;
+ ctlr->rxring.rsp_cons = ++i;
+ return 1;
+}
+
+static int
+ringinit(Ctlr *ctlr, char *a)
+{
+ netif_tx_sring_t *txr;
+ netif_rx_sring_t *rxr;
+
+ txr = (netif_tx_sring_t*)a;
+ memset(txr, 0, BY2PG);
+ SHARED_RING_INIT(txr);
+ FRONT_RING_INIT(&ctlr->txring, txr, BY2PG);
+ ctlr->txringref = shareframe(ctlr->backend, txr, 1);
+
+ rxr = (netif_rx_sring_t*)(a+BY2PG);
+ SHARED_RING_INIT(rxr);
+ FRONT_RING_INIT(&ctlr->rxring, rxr, BY2PG);
+ ctlr->rxringref = shareframe(ctlr->backend, rxr, 1);
+
+ return 2*BY2PG;
+}
+
+static int
+vifsend(Ctlr *ctlr, Block *bp)
+{
+ netif_tx_request_t tr;
+ Txframe *tx;
+ int id;
+
+ ilock(&ctlr->txlock);
+ tx = ctlr->freetxframe;
+ ctlr->freetxframe = tx->tf.next;
+ iunlock(&ctlr->txlock);
+ id = tx - ctlr->txframes;
+ tr.gref = ctlr->txrefs[id];
+ tr.offset = tx->tf.data - (char*)tx;
+ tr.flags = 0; // XXX checksum?
+ tr.id = id;
+ tr.size = BLEN(bp);
+ memmove(tx->tf.data, bp->rp, tr.size);
+ return puttxrequest(ctlr, &tr);
+}
+
+static int
+vifsenddone(Ctlr *ctlr, netif_tx_response_t *tr)
+{
+ Txframe *tx;
+
+ tx = &ctlr->txframes[tr->id]; // XXX check validity of id
+ ilock(&ctlr->txlock);
+ tx->tf.next = ctlr->freetxframe;
+ ctlr->freetxframe = tx;
+ iunlock(&ctlr->txlock);
+ return 1;
+}
+
+static int
+vifrecv(Ctlr *ctlr, Rxframe *rx)
+{
+ netif_rx_request_t rr;
+ int id;
+ int ref;
+
+ id = rx - ctlr->rxframes;
+ if (ctlr->rxcopy)
+ ref = ctlr->rxrefs[id];
+ else {
+ ref = donateframe(ctlr->backend, rx);
+ ctlr->rxrefs[id] = ref;
+ }
+ rr.id = id;
+ rr.gref = ref;
+ return putrxrequest(ctlr, &rr);
+}
+
+static int
+vifrecvdone(Ether *ether, netif_rx_response_t *rr)
+{
+ Ctlr *ctlr;
+ Rxframe *rx;
+ Block *bp;
+ int len;
+
+ ctlr = ether->ctlr;
+ rx = &ctlr->rxframes[rr->id]; // XXX check validity of id
+ if (!ctlr->rxcopy)
+ acceptframe(ctlr->rxrefs[rr->id], rx);
+ if ((len = rr->status) <= 0) {
+ ctlr->rxerrors++;
+ vifrecv(ctlr, rx);
+ return 1;
+ }
+ if(len > sizeof(Etherpkt) || (bp = iallocb(sizeof(Etherpkt))) == nil) {
+ ctlr->rxoverflows++;
+ vifrecv(ctlr, rx);
+ return 1;
+ }
+
+ ctlr->receives++;
+ memmove(bp->base, rx->page + rr->offset, len);
+ vifrecv(ctlr, rx);
+
+ bp->rp = bp->base;
+ bp->wp = bp->rp + len;
+ bp->free = 0;
+ bp->next = 0;
+ bp->list = 0;
+ if (rr->flags & NETRXF_data_validated)
+ bp->flag |= Btcpck|Budpck;
+ etheriq(ether, bp, 1);
+ return 0;
+}
+
+static int
+wtxframe(void *a)
+{
+ return ((struct Ctlr*)a)->freetxframe != 0;
+}
+
+static int
+wtxblock(void *a)
+{
+ return qcanread(((struct Ether*)a)->oq);
+}
+
+static void
+etherxenproc(void *a)
+{
+ Ether *ether = a;
+ Ctlr *ctlr = ether->ctlr;
+ Block *bp;
+ int notify;
+
+ for (;;) {
+ while (ctlr->freetxframe == 0)
+ sleep(&ctlr->wtxframe, wtxframe, ctlr);
+ while ((bp = qget(ether->oq)) == 0)
+ sleep(&ctlr->wtxblock, wtxblock, ether);
+ notify = vifsend(ctlr, bp);
+ freeb(bp);
+ if (notify)
+ xenchannotify(ctlr->evtchn);
+ }
+}
+
+static void
+etherxentransmit(Ether *ether)
+{
+ Ctlr *ctlr;
+
+ ctlr = ether->ctlr;
+ ctlr->transmits++;
+ wakeup(&ctlr->wtxblock);
+}
+
+static void
+etherxenintr(Ureg*, void *a)
+{
+ Ether *ether = a;
+ Ctlr *ctlr = ether->ctlr;
+ int txnotify;
+ netif_tx_response_t tr;
+ netif_rx_response_t rr;
+
+ ctlr->interrupts++;
+ txnotify = 0;
+ while (getrxresponse(ctlr, &rr))
+ vifrecvdone(ether, &rr);
+ while (gettxresponse(ctlr, &tr)) {
+ if (vifsenddone(ctlr, &tr))
+ txnotify = 1;
+ }
+ if (txnotify)
+ wakeup(&ctlr->wtxframe);
+}
+
+static long
+etherxenctl(Ether *ether, void *buf, long n)
+{
+ uchar ea[Eaddrlen];
+ Cmdbuf *cb;
+
+ cb = parsecmd(buf, n);
+ if(cb->nf >= 2
+ && strcmp(cb->f[0], "ea")==0
+ && parseether(ea, cb->f[1]) == 0){
+ free(cb);
+ memmove(ether->ea, ea, Eaddrlen);
+ memmove(ether->addr, ether->ea, Eaddrlen);
+ return 0;
+ }
+ free(cb);
+ error(Ebadctl);
+ return -1; /* not reached */
+}
+
+static void
+backendconnect(Ctlr *ctlr)
+{
+ char dir[64];
+ char buf[64];
+
+ sprint(dir, "device/vif/%d/", ctlr->vifno);
+ xenstore_setd(dir, "state", XenbusStateInitialising);
+ xenstore_setd(dir, "tx-ring-ref", ctlr->txringref);
+ xenstore_setd(dir, "rx-ring-ref", ctlr->rxringref);
+ xenstore_setd(dir, "event-channel", ctlr->evtchn);
+ print("etherxen: request-rx-copy=%d\n", ctlr->rxcopy);
+ if (ctlr->rxcopy)
+ xenstore_setd(dir, "request-rx-copy", 1);
+ xenstore_setd(dir, "state", XenbusStateConnected);
+ xenstore_gets(dir, "backend", buf, sizeof buf);
+ sprint(dir, "%s/", buf);
+ HYPERVISOR_yield();
+ xenstore_gets(dir, "state", buf, sizeof buf);
+ while (strtol(buf, 0, 0) != XenbusStateConnected) {
+ print("etherxen: waiting for vif %d to connect\n", ctlr->vifno);
+ tsleep(&up->sleep, return0, 0, 1000);
+ xenstore_gets(dir, "state", buf, sizeof buf);
+ }
+}
+
+static void
+etherxenattach(Ether *ether)
+{
+ Ctlr *ctlr;
+ char *p;
+ Txframe *tx;
+ int npage, i;
+
+ LOG(dprint("etherxenattach\n");)
+ ctlr = ether->ctlr;
+ qlock(&ctlr->attachlock);
+ if (ctlr->attached) {
+ qunlock(&ctlr->attachlock);
+ return;
+ }
+
+ npage = 2 + Ntb + Nrb;
+ p = (char*)xspanalloc(npage<<PGSHIFT, BY2PG, 0);
+ p += ringinit(ctlr, p);
+ ctlr->txrefs = malloc(Ntb*sizeof(int));
+ ctlr->rxrefs = malloc(Nrb*sizeof(int));
+ ctlr->txframes = (Txframe*)p;
+ for (i = 0; i < Ntb; i++, p += BY2PG) {
+ tx = (Txframe*)p;
+ if (i != Ntb-1)
+ tx->tf.next = tx + 1;
+ else
+ tx->tf.next = 0;
+ ctlr->txrefs[i] = shareframe(ctlr->backend, tx, 0);
+ }
+ ctlr->freetxframe = ctlr->txframes;
+ ctlr->rxframes = (Rxframe*)p;
+ for (i = 0; i < Nrb; i++, p += BY2PG) {
+ if (ctlr->rxcopy)
+ ctlr->rxrefs[i] = shareframe(ctlr->backend, (Rxframe*)p, 1);
+ vifrecv(ctlr, (Rxframe*)p);
+ }
+
+ ctlr->evtchn = xenchanalloc(ctlr->backend);
+ intrenable(ctlr->evtchn, etherxenintr, ether, BUSUNKNOWN, "vif");
+
+ kproc("vif", etherxenproc, ether);
+ backendconnect(ctlr);
+ ctlr->attached = 1;
+ qunlock(&ctlr->attachlock);
+}
+
+static void
+etherxenmulticast(void* arg, uchar* addr, int on)
+{
+ USED(arg, addr, on);
+}
+
+static long
+ifstat(Ether* ether, void* a, long n, ulong offset)
+{
+ Ctlr *ctlr;
+ char *buf, *p;
+ int l, len;
+
+ ctlr = ether->ctlr;
+ if(n == 0)
+ return 0;
+ if((p = malloc(READSTR)) == nil)
+ error(Enomem);
+ l = snprint(p, READSTR, "intr: %lud\n", ctlr->interrupts);
+ l += snprint(p+l, READSTR-l, "transmits: %lud\n", ctlr->transmits);
+ l += snprint(p+l, READSTR-l, "receives: %lud\n", ctlr->receives);
+ l += snprint(p+l, READSTR-l, "txerrors: %lud\n", ctlr->txerrors);
+ l += snprint(p+l, READSTR-l, "rxerrors: %lud\n", ctlr->rxerrors);
+ snprint(p+l, READSTR-l, "rxoverflows: %lud\n", ctlr->rxoverflows);
+
+ buf = a;
+ len = readstr(offset, buf, n, p);
+ free(p);
+
+ return len;
+}
+
+static int
+pnp(Ether* ether)
+{
+ uchar ea[Eaddrlen];
+ char dir[64];
+ char buf[64];
+ Ctlr *ctlr;
+ int domid, rxcopy;
+
+ if (nvif > Nvif)
+ return -1;
+ sprint(dir, "device/vif/%d/", nvif);
+ if (xenstore_gets(dir, "backend-id", buf, sizeof buf) <= 0)
+ return -1;
+ domid = strtol(buf, 0, 0);
+ if (xenstore_gets(dir, "mac", buf, sizeof buf) <= 0)
+ return -1;
+ if (parseether(ea, buf) < 0)
+ return -1;
+ if (xenstore_gets(dir, "backend", buf, sizeof buf) <= 0)
+ return 1;
+ sprint(dir, "%s/", buf);
+ rxcopy = 0;
+ if (xenstore_gets(dir, "feature-rx-copy", buf, sizeof buf) >= 0)
+ rxcopy = strtol(buf, 0, 0);
+ ether->ctlr = ctlr = malloc(sizeof(Ctlr));
+ memset(ctlr, 0, sizeof(Ctlr));
+ ctlr->backend = domid;
+ ctlr->vifno = nvif++;
+ ctlr->rxcopy = rxcopy;
+
+ memmove(ether->ea, ea, sizeof ether->ea);
+ ether->mbps = 100; // XXX what speed?
+ ether->attach = etherxenattach;
+ ether->detach = nil;
+ ether->transmit = etherxentransmit;
+ ether->irq = -1;
+ ether->tbdf = BUSUNKNOWN;
+ ether->interrupt = etherxenintr;
+ ether->ifstat = ifstat;
+ ether->ctl = etherxenctl;
+ ether->promiscuous = nil;
+ ether->multicast = etherxenmulticast;
+ ether->arg = ether;
+ return 0;
+}
+
+void
+etherxenlink(void)
+{
+ addethercard("xen", pnp);
+}
--- /dev/null
+++ b/sys/src/9/xen/fns.h
@@ -1,0 +1,164 @@
+#include "../port/portfns.h"
+
+Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));
+void archinit(void);
+void bootargs(ulong);
+ulong cankaddr(ulong);
+int cistrcmp(char*, char*);
+int cistrncmp(char*, char*, int);
+#define clearmmucache() /* x86 doesn't have one */
+void clockintr(Ureg*, void*);
+int (*cmpswap)(long*, long, long);
+int cmpswap486(long*, long, long);
+void (*coherence)(void);
+void cpuid(int, ulong regs[]);
+int cpuidentify(void);
+void cpuidprint(void);
+void (*cycles)(uvlong*);
+void delay(int);
+#define evenaddr(x) /* x86 doesn't care */
+void fpclear(void);
+void fpenv(FPsave*);
+void fpinit(void);
+void fpoff(void);
+void (*fprestore)(FPsave*);
+void (*fpsave)(FPsave*);
+void fpsserestore(FPsave*);
+void fpsserestore0(FPsave*);
+void fpssesave(FPsave*);
+void fpssesave0(FPsave*);
+ulong fpstatus(void);
+void fpx87restore(FPsave*);
+void fpx87save(FPsave*);
+ulong getcr4(void);
+char* getconf(char*);
+void guesscpuhz(int);
+void halt(void);
+void mwait(void*);
+void i8042reset(void);
+void i8253enable(void);
+void i8253init(void);
+void i8253link(void);
+uvlong i8253read(uvlong*);
+void i8253timerset(uvlong);
+int i8259disable(int);
+int i8259enable(Vctl*);
+void i8259init(void);
+int i8259isr(int);
+void i8259on(void);
+void i8259off(void);
+int i8259vecno(int);
+void idle(void);
+void idlehands(void);
+int inb(int);
+void insb(int, void*, int);
+ushort ins(int);
+void inss(int, void*, int);
+ulong inl(int);
+void insl(int, void*, int);
+int intrdisable(int, void (*)(Ureg *, void *), void*, int, char*);
+void intrenable(int, void (*)(Ureg*, void*), void*, int, char*);
+int ioalloc(int, int, int, char*);
+int isaconfig(char*, int, ISAConf*);
+void kbdenable(void);
+#define kmapinval()
+void lgdt(ushort[3]); // XXX remove and in l.s
+void lidt(ushort[3]); // XXX remove and in l.s
+void links(void);
+void ltr(ulong); // XXX remove?
+void mach0init(void);
+void mathinit(void);
+void mb386(void);
+void mb586(void);
+void mfence(void);
+void mmuflushtlb(Page*);
+void mmuinit(void);
+ulong mmukmap(ulong, ulong, int);
+int mmukmapsync(ulong);
+#define mmunewpage(x)
+ulong* mmuwalk(ulong*, ulong, int, int);
+int mtrr(uvlong, uvlong, char *);
+int mtrrprint(char *, long);
+void outb(int, int);
+void outsb(int, void*, int);
+void outs(int, ushort);
+void outss(int, void*, int);
+void outl(int, ulong);
+void outsl(int, void*, int);
+void printcpufreq(void);
+void procrestore(Proc*);
+void procsave(Proc*);
+void procsetup(Proc*);
+void procfork(Proc*);
+void putcr4(ulong);
+int rdmsr(int, vlong*);
+int screenprint(char*, ...); /* debugging */
+void (*screenputs)(char*, int);
+void touser(void*);
+void trap(Ureg*);
+void trapenable(int, void (*)(Ureg*, void*), void*, char*);
+void trapinit(void);
+int tas(void*);
+#define userureg(ur) (((ur)->cs & 0xFFFF) == UESEL)
+void vectortable(void);
+int wrmsr(int, vlong);
+uint xchgl(uint*, uint);
+uint xchgw(ushort*, uint);
+uint xchgb(uchar*, uint);
+
+#define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
+#define KADDR(a) ((void*)((ulong)(a)|KZERO))
+#define PADDR(a) ((ulong)(a)&~KZERO)
+
+#define dcflush(a, b)
+
+/* Xen functions */
+#define rmb() coherence()
+#define wmb() coherence()
+void mb(void);
+void hypervisor_callback(void), failsafe_callback(void);
+void xenconsinit(void);
+ulong mmumapframe(ulong, ulong);
+void mmumapcpu0(void);
+void dprint(char *, ...);
+void xenupdate(ulong *ptr, ulong val);
+void xenupdatema(ulong *ptr, uvlong val);
+int xenpdptpin(ulong va);
+int xenpgdpin(ulong va);
+int xenptpin(ulong va);
+void xenptunpin(ulong va);
+void xenptswitch(ulong pa);
+void xentlbflush(void);
+int ffs(ulong);
+void xengrantinit(void);
+int xengrant(domid_t domid, ulong frame, int flags);
+int xengrantend(int ref);
+void acceptframe(int ref, void *va);
+int donateframe(int domid, void *va);
+int shareframe(int domid, void *va, int write);
+void xenchannotify(int);
+void xenupcall(Ureg*);
+ulong xenwallclock(void);
+int xenstore_read(char*, char*, int);
+void xenstore_write(char*, char*);
+void xenstore_setd(char *dir, char *node, int value);
+int xenstore_gets(char *dir, char *node, char *buf, int buflen);
+int xenchanalloc(int);
+
+long HYPERVISOR_set_timer_op(uvlong timeout);
+int HYPERVISOR_set_trap_table(trap_info_t *table);
+int HYPERVISOR_mmu_update(mmu_update_t *req, int count, int *success_count, domid_t domid);
+int HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid);
+int HYPERVISOR_set_gdt(ulong *frame_list, int entries);
+int HYPERVISOR_stack_switch(ulong ss, ulong esp);
+int HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc);
+int HYPERVISOR_fpu_taskswitch(void);
+int HYPERVISOR_yield(void);
+int HYPERVISOR_block(void);
+int HYPERVISOR_shutdown(int);
+int HYPERVISOR_multicall(void *call_list, int nr_calls);
+int HYPERVISOR_event_channel_op(void *op);
+int HYPERVISOR_xen_version(int cmd, void *arg);
+int HYPERVISOR_console_io(int cmd, int count, char *str);
+int HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count);
+int HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg);
--- /dev/null
+++ b/sys/src/9/xen/l.s
@@ -1,0 +1,678 @@
+#include "xendefs.h"
+#include "mem.h"
+
+/*
+ * Some machine instructions not handled by 8[al].
+ */
+#define OP16 BYTE $0x66
+#define DELAY BYTE $0xEB; BYTE $0x00 /* JMP .+2 */
+#define CPUID BYTE $0x0F; BYTE $0xA2 /* CPUID, argument in AX */
+#define WRMSR BYTE $0x0F; BYTE $0x30 /* WRMSR, argument in AX/DX (lo/hi) */
+#define RDTSC BYTE $0x0F; BYTE $0x31 /* RDTSC, result in AX/DX (lo/hi) */
+#define RDMSR BYTE $0x0F; BYTE $0x32 /* RDMSR, result in AX/DX (lo/hi) */
+#define HLT BYTE $0xF4
+#define BSFL BYTE $0xf; BYTE $0xbc; BYTE $0xc0 /* bsfl AX,AX */
+
+/*
+ * Macros for calculating offsets within the page directory base
+ * and page tables. Note that these are assembler-specific hence
+ * the '<<2'.
+ */
+#define PDO(a) (((((a))>>22) & 0x03FF)<<2)
+#define PTO(a) (((((a))>>12) & 0x03FF)<<2)
+
+/*
+ * Entry point from XEN's "linux" builder.
+ * At this point RAM from 0..4M ("physical") is mapped at KZERO,
+ * the kernel is loaded, we're running on a boot stack and a
+ * boot page table. The start_info structure describes the
+ * situation, and is pointed to by SI.
+ * The stack is the highest used address.
+ */
+TEXT _start(SB), $0
+ MOVL SP, xentop(SB) /* set global for top of mapped region */
+ MOVL SI, xenstart(SB) /* set global to start_info_t */
+ MOVL $0, AX /* clear EFLAGS */
+ PUSHL AX
+ POPFL
+ CALL mmumapcpu0(SB) /* make mapping before using stack */
+ MOVL $(MACHADDR+MACHSIZE-4), SP /* set stack */
+ CALL main(SB)
+
+/*
+ * Park a processor. Should never fall through a return from main to here,
+ * should only be called by application processors when shutting down.
+ */
+TEXT idle(SB), $0
+_idle:
+ STI
+ HLT
+ JMP _idle
+
+/*
+ * Read/write various system registers.
+ * CR4 and the 'model specific registers' should only be read/written
+ * after it has been determined the processor supports them
+ */
+TEXT lgdt(SB), $0 /* GDTR - global descriptor table */
+ MOVL gdtptr+0(FP), AX
+ MOVL (AX), GDTR
+ RET
+
+TEXT lidt(SB), $0 /* IDTR - interrupt descriptor table */
+ MOVL idtptr+0(FP), AX
+ MOVL (AX), IDTR
+ RET
+
+TEXT ltr(SB), $0 /* TR - task register */
+ MOVL tptr+0(FP), AX
+ MOVW AX, TASK
+ RET
+
+TEXT rtsr(SB), $0
+ MOVW TASK, AX
+ RET
+
+TEXT _cycles(SB), $0 /* time stamp counter; cycles since power up */
+ RDTSC
+ MOVL vlong+0(FP), CX /* &vlong */
+ MOVL AX, 0(CX) /* lo */
+ MOVL DX, 4(CX) /* hi */
+ RET
+
+TEXT rdmsr(SB), $0 /* model-specific register */
+ MOVL index+0(FP), CX
+ RDMSR
+ MOVL vlong+4(FP), CX /* &vlong */
+ MOVL AX, 0(CX) /* lo */
+ MOVL DX, 4(CX) /* hi */
+ RET
+
+TEXT wrmsr(SB), $0
+ MOVL index+0(FP), CX
+ MOVL lo+4(FP), AX
+ MOVL hi+8(FP), DX
+/* Xen doesn't let us do this
+ WRMSR
+ */
+ RET
+
+/*
+ * Try to determine the CPU type which requires fiddling with EFLAGS.
+ * If the Id bit can be toggled then the CPUID instruction can be used
+ * to determine CPU identity and features. First have to check if it's
+ * a 386 (Ac bit can't be set). If it's not a 386 and the Id bit can't be
+ * toggled then it's an older 486 of some kind.
+ *
+ * cpuid(fun, regs[4]);
+ */
+TEXT cpuid(SB), $0
+ MOVL $0x240000, AX
+ PUSHL AX
+ POPFL /* set Id|Ac */
+ PUSHFL
+ POPL BX /* retrieve value */
+ MOVL $0, AX
+ PUSHL AX
+ POPFL /* clear Id|Ac, EFLAGS initialised */
+ PUSHFL
+ POPL AX /* retrieve value */
+ XORL BX, AX
+ TESTL $0x040000, AX /* Ac */
+ JZ _cpu386 /* can't set this bit on 386 */
+ TESTL $0x200000, AX /* Id */
+ JZ _cpu486 /* can't toggle this bit on some 486 */
+ MOVL fn+0(FP), AX
+ CPUID
+ JMP _cpuid
+_cpu486:
+ MOVL $0x400, AX
+ JMP _maybezapax
+_cpu386:
+ MOVL $0x300, AX
+_maybezapax:
+ CMPL fn+0(FP), $1
+ JE _zaprest
+ XORL AX, AX
+_zaprest:
+ XORL BX, BX
+ XORL CX, CX
+ XORL DX, DX
+_cpuid:
+ MOVL regs+4(FP), BP
+ MOVL AX, 0(BP)
+ MOVL BX, 4(BP)
+ MOVL CX, 8(BP)
+ MOVL DX, 12(BP)
+ RET
+
+/*
+ * Floating point.
+ * Note: the encodings for the FCLEX, FINIT, FSAVE, FSTCW, FSENV and FSTSW
+ * instructions do NOT have the WAIT prefix byte (i.e. they act like their
+ * FNxxx variations) so WAIT instructions must be explicitly placed in the
+ * code as necessary.
+ */
+#define FPOFF(l) ;\
+ MOVL CR0, AX ;\
+ ANDL $0xC, AX /* EM, TS */ ;\
+ CMPL AX, $0x8 ;\
+ JEQ l ;\
+ WAIT ;\
+l: ;\
+ MOVL CR0, AX ;\
+ ANDL $~0x4, AX /* EM=0 */ ;\
+ ORL $0x28, AX /* NE=1, TS=1 */ ;\
+ MOVL AX, CR0
+
+#define FPON ;\
+ MOVL CR0, AX ;\
+ ANDL $~0xC, AX /* EM=0, TS=0 */ ;\
+ MOVL AX, CR0
+
+TEXT fpoff(SB), $0 /* disable */
+ FPOFF(l1)
+ RET
+
+TEXT fpinit(SB), $0 /* enable and init */
+ FPON
+ FINIT
+ WAIT
+ /* setfcr(FPPDBL|FPRNR|FPINVAL|FPZDIV|FPOVFL) */
+ /* note that low 6 bits are masks, not enables, on this chip */
+ PUSHW $0x0232
+ FLDCW 0(SP)
+ POPW AX
+ WAIT
+ RET
+
+TEXT fpx87save(SB), $0 /* save state and disable */
+ MOVL p+0(FP), AX
+ FSAVE 0(AX) /* no WAIT */
+ FPOFF(l2)
+ RET
+
+TEXT fpx87restore(SB), $0 /* enable and restore state */
+ FPON
+ MOVL p+0(FP), AX
+ FRSTOR 0(AX)
+ WAIT
+ RET
+
+TEXT fpstatus(SB), $0 /* get floating point status */
+ FSTSW AX
+ RET
+
+TEXT fpenv(SB), $0 /* save state without waiting */
+ MOVL p+0(FP), AX
+ FSTENV 0(AX)
+ RET
+
+TEXT fpclear(SB), $0 /* clear pending exceptions */
+ FPON
+ FCLEX /* no WAIT */
+ FPOFF(l3)
+ RET
+
+/*
+ * Test-And-Set
+ */
+TEXT tas(SB), $0
+ MOVL $0xDEADDEAD, AX
+ MOVL lock+0(FP), BX
+ XCHGL AX, (BX) /* lock->key */
+ RET
+
+TEXT _xinc(SB), $0 /* void _xinc(long*); */
+ MOVL l+0(FP), AX
+ LOCK; INCL 0(AX)
+ RET
+
+TEXT _xdec(SB), $0 /* long _xdec(long*); */
+ MOVL l+0(FP), BX
+ XORL AX, AX
+ LOCK; DECL 0(BX)
+ JLT _xdeclt
+ JGT _xdecgt
+ RET
+_xdecgt:
+ INCL AX
+ RET
+_xdeclt:
+ DECL AX
+ RET
+
+TEXT getstack(SB), $0
+ MOVL SP, AX
+ RET
+TEXT mb386(SB), $0
+ POPL AX /* return PC */
+ PUSHFL
+ PUSHL CS
+ PUSHL AX
+ IRETL
+
+TEXT mb586(SB), $0
+ XORL AX, AX
+ CPUID
+ RET
+
+TEXT sfence(SB), $0
+ BYTE $0x0f
+ BYTE $0xae
+ BYTE $0xf8
+ RET
+
+TEXT lfence(SB), $0
+ BYTE $0x0f
+ BYTE $0xae
+ BYTE $0xe8
+ RET
+
+TEXT mfence(SB), $0
+ BYTE $0x0f
+ BYTE $0xae
+ BYTE $0xf0
+ RET
+
+
+TEXT xchgw(SB), $0
+ MOVL v+4(FP), AX
+ MOVL p+0(FP), BX
+ XCHGW AX, (BX)
+ RET
+
+TEXT xchgb(SB), $0
+ MOVL v+4(FP), AX
+ MOVL p+0(FP), BX
+ XCHGB AX, (BX)
+ RET
+
+TEXT xchgl(SB), $0
+ MOVL v+4(FP), AX
+ MOVL p+0(FP), BX
+ XCHGL AX, (BX)
+ RET
+
+/* Return the position of the first bit set. Undefined if zero. */
+TEXT ffs(SB), $0
+ MOVL v+0(FP), AX
+ BSFL
+ RET
+
+TEXT cmpswap486(SB), $0
+ MOVL addr+0(FP), BX
+ MOVL old+4(FP), AX
+ MOVL new+8(FP), CX
+ LOCK
+ BYTE $0x0F; BYTE $0xB1; BYTE $0x0B /* CMPXCHGL CX, (BX) */
+ JNZ didnt
+ MOVL $1, AX
+ RET
+didnt:
+ XORL AX,AX
+ RET
+
+TEXT mul64fract(SB), $0
+ MOVL r+0(FP), CX
+ XORL BX, BX /* BX = 0 */
+
+ MOVL a+8(FP), AX
+ MULL b+16(FP) /* a1*b1 */
+ MOVL AX, 4(CX) /* r2 = lo(a1*b1) */
+
+ MOVL a+8(FP), AX
+ MULL b+12(FP) /* a1*b0 */
+ MOVL AX, 0(CX) /* r1 = lo(a1*b0) */
+ ADDL DX, 4(CX) /* r2 += hi(a1*b0) */
+
+ MOVL a+4(FP), AX
+ MULL b+16(FP) /* a0*b1 */
+ ADDL AX, 0(CX) /* r1 += lo(a0*b1) */
+ ADCL DX, 4(CX) /* r2 += hi(a0*b1) + carry */
+
+ MOVL a+4(FP), AX
+ MULL b+12(FP) /* a0*b0 */
+ ADDL DX, 0(CX) /* r1 += hi(a0*b0) */
+ ADCL BX, 4(CX) /* r2 += carry */
+ RET
+
+/*
+ * label consists of a stack pointer and a PC
+ */
+TEXT gotolabel(SB), $0
+ MOVL label+0(FP), AX
+ MOVL 0(AX), SP /* restore sp */
+ MOVL 4(AX), AX /* put return pc on the stack */
+ MOVL AX, 0(SP)
+ MOVL $1, AX /* return 1 */
+ RET
+
+TEXT setlabel(SB), $0
+ MOVL label+0(FP), AX
+ MOVL SP, 0(AX) /* store sp */
+ MOVL 0(SP), BX /* store return pc */
+ MOVL BX, 4(AX)
+ MOVL $0, AX /* return 0 */
+ RET
+
+TEXT mwait(SB), $0
+ MOVL addr+0(FP), AX
+ MOVL (AX), CX
+ ORL CX, CX
+ JNZ _mwaitdone
+ XORL DX, DX
+ BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */
+ MOVL (AX), CX
+ ORL CX, CX
+ JNZ _mwaitdone
+ XORL AX, AX
+ BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */
+_mwaitdone:
+ RET
+
+/*
+ * Interrupt/exception handling.
+ * Each entry in the vector table calls either _strayintr or _strayintrx depending
+ * on whether an error code has been automatically pushed onto the stack
+ * (_strayintrx) or not, in which case a dummy entry must be pushed before retrieving
+ * the trap type from the vector table entry and placing it on the stack as part
+ * of the Ureg structure.
+ * Exceptions to this are the syscall vector and the page fault
+ * vector. Syscalls are dispatched seperately. Page faults
+ * have to take care of the extra cr2 parameter that xen places
+ * at the top of the stack.
+ * The size of each entry in the vector table (6 bytes) is known in trapinit().
+ */
+TEXT _strayintr(SB), $0
+ PUSHL AX /* save AX */
+ MOVL 4(SP), AX /* return PC from vectortable(SB) */
+ JMP intrcommon
+
+TEXT _strayintrx(SB), $0
+ XCHGL AX, (SP) /* swap AX with vectortable CALL PC */
+intrcommon:
+ PUSHL DS /* save DS */
+ PUSHL $(KDSEL)
+ POPL DS /* fix up DS */
+ MOVBLZX (AX), AX /* trap type -> AX */
+ XCHGL AX, 4(SP) /* exchange trap type with saved AX */
+
+ PUSHL ES /* save ES */
+ PUSHL $(KDSEL)
+ POPL ES /* fix up ES */
+
+ PUSHL FS /* save the rest of the Ureg struct */
+ PUSHL GS
+ PUSHAL
+
+ PUSHL SP /* Ureg* argument to trap */
+ CALL trap(SB)
+
+TEXT forkret(SB), $0
+ POPL AX
+ POPAL
+ POPL GS
+ POPL FS
+ POPL ES
+ POPL DS
+ ADDL $8, SP /* pop error code and trap type */
+ IRETL
+
+TEXT vectortable(SB), $0
+ CALL _strayintr(SB); BYTE $0x00 /* divide error */
+ CALL _strayintr(SB); BYTE $0x01 /* debug exception */
+ CALL _strayintr(SB); BYTE $0x02 /* NMI interrupt */
+ CALL _strayintr(SB); BYTE $0x03 /* breakpoint */
+ CALL _strayintr(SB); BYTE $0x04 /* overflow */
+ CALL _strayintr(SB); BYTE $0x05 /* bound */
+ CALL _strayintr(SB); BYTE $0x06 /* invalid opcode */
+ CALL _strayintr(SB); BYTE $0x07 /* no coprocessor available */
+ CALL _strayintrx(SB); BYTE $0x08 /* double fault */
+ CALL _strayintr(SB); BYTE $0x09 /* coprocessor segment overflow */
+ CALL _strayintrx(SB); BYTE $0x0A /* invalid TSS */
+ CALL _strayintrx(SB); BYTE $0x0B /* segment not available */
+ CALL _strayintrx(SB); BYTE $0x0C /* stack exception */
+ CALL _strayintrx(SB); BYTE $0x0D /* general protection error */
+ CALL _strayintrx(SB); BYTE $0x0E /* page fault */
+ CALL _strayintr(SB); BYTE $0x0F /* */
+ CALL _strayintr(SB); BYTE $0x10 /* coprocessor error */
+ CALL _strayintrx(SB); BYTE $0x11 /* alignment check */
+ CALL _strayintr(SB); BYTE $0x12 /* machine check */
+ CALL _strayintr(SB); BYTE $0x13
+ CALL _strayintr(SB); BYTE $0x14
+ CALL _strayintr(SB); BYTE $0x15
+ CALL _strayintr(SB); BYTE $0x16
+ CALL _strayintr(SB); BYTE $0x17
+ CALL _strayintr(SB); BYTE $0x18
+ CALL _strayintr(SB); BYTE $0x19
+ CALL _strayintr(SB); BYTE $0x1A
+ CALL _strayintr(SB); BYTE $0x1B
+ CALL _strayintr(SB); BYTE $0x1C
+ CALL _strayintr(SB); BYTE $0x1D
+ CALL _strayintr(SB); BYTE $0x1E
+ CALL _strayintr(SB); BYTE $0x1F
+ CALL _strayintr(SB); BYTE $0x20 /* VectorLAPIC */
+ CALL _strayintr(SB); BYTE $0x21
+ CALL _strayintr(SB); BYTE $0x22
+ CALL _strayintr(SB); BYTE $0x23
+ CALL _strayintr(SB); BYTE $0x24
+ CALL _strayintr(SB); BYTE $0x25
+ CALL _strayintr(SB); BYTE $0x26
+ CALL _strayintr(SB); BYTE $0x27
+ CALL _strayintr(SB); BYTE $0x28
+ CALL _strayintr(SB); BYTE $0x29
+ CALL _strayintr(SB); BYTE $0x2A
+ CALL _strayintr(SB); BYTE $0x2B
+ CALL _strayintr(SB); BYTE $0x2C
+ CALL _strayintr(SB); BYTE $0x2D
+ CALL _strayintr(SB); BYTE $0x2E
+ CALL _strayintr(SB); BYTE $0x2F
+ CALL _strayintr(SB); BYTE $0x30
+ CALL _strayintr(SB); BYTE $0x31
+ CALL _strayintr(SB); BYTE $0x32
+ CALL _strayintr(SB); BYTE $0x33
+ CALL _strayintr(SB); BYTE $0x34
+ CALL _strayintr(SB); BYTE $0x35
+ CALL _strayintr(SB); BYTE $0x36
+ CALL _strayintr(SB); BYTE $0x37
+ CALL _strayintr(SB); BYTE $0x38
+ CALL _strayintr(SB); BYTE $0x39
+ CALL _strayintr(SB); BYTE $0x3A
+ CALL _strayintr(SB); BYTE $0x3B
+ CALL _strayintr(SB); BYTE $0x3C
+ CALL _strayintr(SB); BYTE $0x3D
+ CALL _strayintr(SB); BYTE $0x3E
+ CALL _strayintr(SB); BYTE $0x3F
+ CALL _syscallintr(SB); BYTE $0x40 /* VectorSYSCALL */
+ CALL _strayintr(SB); BYTE $0x41
+ CALL _strayintr(SB); BYTE $0x42
+ CALL _strayintr(SB); BYTE $0x43
+ CALL _strayintr(SB); BYTE $0x44
+ CALL _strayintr(SB); BYTE $0x45
+ CALL _strayintr(SB); BYTE $0x46
+ CALL _strayintr(SB); BYTE $0x47
+ CALL _strayintr(SB); BYTE $0x48
+ CALL _strayintr(SB); BYTE $0x49
+ CALL _strayintr(SB); BYTE $0x4A
+ CALL _strayintr(SB); BYTE $0x4B
+ CALL _strayintr(SB); BYTE $0x4C
+ CALL _strayintr(SB); BYTE $0x4D
+ CALL _strayintr(SB); BYTE $0x4E
+ CALL _strayintr(SB); BYTE $0x4F
+ CALL _strayintr(SB); BYTE $0x50
+ CALL _strayintr(SB); BYTE $0x51
+ CALL _strayintr(SB); BYTE $0x52
+ CALL _strayintr(SB); BYTE $0x53
+ CALL _strayintr(SB); BYTE $0x54
+ CALL _strayintr(SB); BYTE $0x55
+ CALL _strayintr(SB); BYTE $0x56
+ CALL _strayintr(SB); BYTE $0x57
+ CALL _strayintr(SB); BYTE $0x58
+ CALL _strayintr(SB); BYTE $0x59
+ CALL _strayintr(SB); BYTE $0x5A
+ CALL _strayintr(SB); BYTE $0x5B
+ CALL _strayintr(SB); BYTE $0x5C
+ CALL _strayintr(SB); BYTE $0x5D
+ CALL _strayintr(SB); BYTE $0x5E
+ CALL _strayintr(SB); BYTE $0x5F
+ CALL _strayintr(SB); BYTE $0x60
+ CALL _strayintr(SB); BYTE $0x61
+ CALL _strayintr(SB); BYTE $0x62
+ CALL _strayintr(SB); BYTE $0x63
+ CALL _strayintr(SB); BYTE $0x64
+ CALL _strayintr(SB); BYTE $0x65
+ CALL _strayintr(SB); BYTE $0x66
+ CALL _strayintr(SB); BYTE $0x67
+ CALL _strayintr(SB); BYTE $0x68
+ CALL _strayintr(SB); BYTE $0x69
+ CALL _strayintr(SB); BYTE $0x6A
+ CALL _strayintr(SB); BYTE $0x6B
+ CALL _strayintr(SB); BYTE $0x6C
+ CALL _strayintr(SB); BYTE $0x6D
+ CALL _strayintr(SB); BYTE $0x6E
+ CALL _strayintr(SB); BYTE $0x6F
+ CALL _strayintr(SB); BYTE $0x70
+ CALL _strayintr(SB); BYTE $0x71
+ CALL _strayintr(SB); BYTE $0x72
+ CALL _strayintr(SB); BYTE $0x73
+ CALL _strayintr(SB); BYTE $0x74
+ CALL _strayintr(SB); BYTE $0x75
+ CALL _strayintr(SB); BYTE $0x76
+ CALL _strayintr(SB); BYTE $0x77
+ CALL _strayintr(SB); BYTE $0x78
+ CALL _strayintr(SB); BYTE $0x79
+ CALL _strayintr(SB); BYTE $0x7A
+ CALL _strayintr(SB); BYTE $0x7B
+ CALL _strayintr(SB); BYTE $0x7C
+ CALL _strayintr(SB); BYTE $0x7D
+ CALL _strayintr(SB); BYTE $0x7E
+ CALL _strayintr(SB); BYTE $0x7F
+ CALL _strayintr(SB); BYTE $0x80 /* Vector[A]PIC */
+ CALL _strayintr(SB); BYTE $0x81
+ CALL _strayintr(SB); BYTE $0x82
+ CALL _strayintr(SB); BYTE $0x83
+ CALL _strayintr(SB); BYTE $0x84
+ CALL _strayintr(SB); BYTE $0x85
+ CALL _strayintr(SB); BYTE $0x86
+ CALL _strayintr(SB); BYTE $0x87
+ CALL _strayintr(SB); BYTE $0x88
+ CALL _strayintr(SB); BYTE $0x89
+ CALL _strayintr(SB); BYTE $0x8A
+ CALL _strayintr(SB); BYTE $0x8B
+ CALL _strayintr(SB); BYTE $0x8C
+ CALL _strayintr(SB); BYTE $0x8D
+ CALL _strayintr(SB); BYTE $0x8E
+ CALL _strayintr(SB); BYTE $0x8F
+ CALL _strayintr(SB); BYTE $0x90
+ CALL _strayintr(SB); BYTE $0x91
+ CALL _strayintr(SB); BYTE $0x92
+ CALL _strayintr(SB); BYTE $0x93
+ CALL _strayintr(SB); BYTE $0x94
+ CALL _strayintr(SB); BYTE $0x95
+ CALL _strayintr(SB); BYTE $0x96
+ CALL _strayintr(SB); BYTE $0x97
+ CALL _strayintr(SB); BYTE $0x98
+ CALL _strayintr(SB); BYTE $0x99
+ CALL _strayintr(SB); BYTE $0x9A
+ CALL _strayintr(SB); BYTE $0x9B
+ CALL _strayintr(SB); BYTE $0x9C
+ CALL _strayintr(SB); BYTE $0x9D
+ CALL _strayintr(SB); BYTE $0x9E
+ CALL _strayintr(SB); BYTE $0x9F
+ CALL _strayintr(SB); BYTE $0xA0
+ CALL _strayintr(SB); BYTE $0xA1
+ CALL _strayintr(SB); BYTE $0xA2
+ CALL _strayintr(SB); BYTE $0xA3
+ CALL _strayintr(SB); BYTE $0xA4
+ CALL _strayintr(SB); BYTE $0xA5
+ CALL _strayintr(SB); BYTE $0xA6
+ CALL _strayintr(SB); BYTE $0xA7
+ CALL _strayintr(SB); BYTE $0xA8
+ CALL _strayintr(SB); BYTE $0xA9
+ CALL _strayintr(SB); BYTE $0xAA
+ CALL _strayintr(SB); BYTE $0xAB
+ CALL _strayintr(SB); BYTE $0xAC
+ CALL _strayintr(SB); BYTE $0xAD
+ CALL _strayintr(SB); BYTE $0xAE
+ CALL _strayintr(SB); BYTE $0xAF
+ CALL _strayintr(SB); BYTE $0xB0
+ CALL _strayintr(SB); BYTE $0xB1
+ CALL _strayintr(SB); BYTE $0xB2
+ CALL _strayintr(SB); BYTE $0xB3
+ CALL _strayintr(SB); BYTE $0xB4
+ CALL _strayintr(SB); BYTE $0xB5
+ CALL _strayintr(SB); BYTE $0xB6
+ CALL _strayintr(SB); BYTE $0xB7
+ CALL _strayintr(SB); BYTE $0xB8
+ CALL _strayintr(SB); BYTE $0xB9
+ CALL _strayintr(SB); BYTE $0xBA
+ CALL _strayintr(SB); BYTE $0xBB
+ CALL _strayintr(SB); BYTE $0xBC
+ CALL _strayintr(SB); BYTE $0xBD
+ CALL _strayintr(SB); BYTE $0xBE
+ CALL _strayintr(SB); BYTE $0xBF
+ CALL _strayintr(SB); BYTE $0xC0
+ CALL _strayintr(SB); BYTE $0xC1
+ CALL _strayintr(SB); BYTE $0xC2
+ CALL _strayintr(SB); BYTE $0xC3
+ CALL _strayintr(SB); BYTE $0xC4
+ CALL _strayintr(SB); BYTE $0xC5
+ CALL _strayintr(SB); BYTE $0xC6
+ CALL _strayintr(SB); BYTE $0xC7
+ CALL _strayintr(SB); BYTE $0xC8
+ CALL _strayintr(SB); BYTE $0xC9
+ CALL _strayintr(SB); BYTE $0xCA
+ CALL _strayintr(SB); BYTE $0xCB
+ CALL _strayintr(SB); BYTE $0xCC
+ CALL _strayintr(SB); BYTE $0xCD
+ CALL _strayintr(SB); BYTE $0xCE
+ CALL _strayintr(SB); BYTE $0xCF
+ CALL _strayintr(SB); BYTE $0xD0
+ CALL _strayintr(SB); BYTE $0xD1
+ CALL _strayintr(SB); BYTE $0xD2
+ CALL _strayintr(SB); BYTE $0xD3
+ CALL _strayintr(SB); BYTE $0xD4
+ CALL _strayintr(SB); BYTE $0xD5
+ CALL _strayintr(SB); BYTE $0xD6
+ CALL _strayintr(SB); BYTE $0xD7
+ CALL _strayintr(SB); BYTE $0xD8
+ CALL _strayintr(SB); BYTE $0xD9
+ CALL _strayintr(SB); BYTE $0xDA
+ CALL _strayintr(SB); BYTE $0xDB
+ CALL _strayintr(SB); BYTE $0xDC
+ CALL _strayintr(SB); BYTE $0xDD
+ CALL _strayintr(SB); BYTE $0xDE
+ CALL _strayintr(SB); BYTE $0xDF
+ CALL _strayintr(SB); BYTE $0xE0
+ CALL _strayintr(SB); BYTE $0xE1
+ CALL _strayintr(SB); BYTE $0xE2
+ CALL _strayintr(SB); BYTE $0xE3
+ CALL _strayintr(SB); BYTE $0xE4
+ CALL _strayintr(SB); BYTE $0xE5
+ CALL _strayintr(SB); BYTE $0xE6
+ CALL _strayintr(SB); BYTE $0xE7
+ CALL _strayintr(SB); BYTE $0xE8
+ CALL _strayintr(SB); BYTE $0xE9
+ CALL _strayintr(SB); BYTE $0xEA
+ CALL _strayintr(SB); BYTE $0xEB
+ CALL _strayintr(SB); BYTE $0xEC
+ CALL _strayintr(SB); BYTE $0xED
+ CALL _strayintr(SB); BYTE $0xEE
+ CALL _strayintr(SB); BYTE $0xEF
+ CALL _strayintr(SB); BYTE $0xF0
+ CALL _strayintr(SB); BYTE $0xF1
+ CALL _strayintr(SB); BYTE $0xF2
+ CALL _strayintr(SB); BYTE $0xF3
+ CALL _strayintr(SB); BYTE $0xF4
+ CALL _strayintr(SB); BYTE $0xF5
+ CALL _strayintr(SB); BYTE $0xF6
+ CALL _strayintr(SB); BYTE $0xF7
+ CALL _strayintr(SB); BYTE $0xF8
+ CALL _strayintr(SB); BYTE $0xF9
+ CALL _strayintr(SB); BYTE $0xFA
+ CALL _strayintr(SB); BYTE $0xFB
+ CALL _strayintr(SB); BYTE $0xFC
+ CALL _strayintr(SB); BYTE $0xFD
+ CALL _strayintr(SB); BYTE $0xFE
+ CALL _strayintr(SB); BYTE $0xFF
--- /dev/null
+++ b/sys/src/9/xen/main.c
@@ -1,0 +1,801 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "init.h"
+#include "pool.h"
+#include "reboot.h"
+#include <tos.h>
+
+Mach *m;
+
+#define BOOTARGS (xenstart->cmd_line)
+#define BOOTARGSLEN (sizeof xenstart->cmd_line)
+#define MAXCONF 64
+
+enum {
+ /* space for syscall args, return PC, top-of-stack struct */
+ Ustkheadroom = sizeof(Sargs) + sizeof(uintptr) + sizeof(Tos),
+};
+
+char bootdisk[KNAMELEN];
+Conf conf;
+char *confname[MAXCONF];
+char *confval[MAXCONF];
+int nconf;
+uchar *sp; /* user stack of init proc */
+int idle_spin;
+
+static void
+options(void)
+{
+ long i, n;
+ char *cp, *line[MAXCONF], *p, *q;
+
+ /*
+ * parse configuration args from dos file plan9.ini
+ */
+ cp = BOOTARGS; /* where b.com leaves its config */
+ cp[BOOTARGSLEN-1] = 0;
+
+ /*
+ * Strip out '\r', change '\t' -> ' '.
+ */
+ p = cp;
+ for(q = cp; *q; q++){
+ if(*q == '\r')
+ continue;
+ if(*q == '\t')
+ *q = ' ';
+ *p++ = *q;
+ }
+ *p = 0;
+
+ n = getfields(cp, line, MAXCONF, 1, "\n");
+ for(i = 0; i < n; i++){
+ if(*line[i] == '#')
+ continue;
+ cp = strchr(line[i], '=');
+ if(cp == nil)
+ continue;
+ *cp++ = '\0';
+ confname[nconf] = line[i];
+ confval[nconf] = cp;
+ nconf++;
+ }
+}
+
+void
+main(void)
+{
+ mach0init();
+ options();
+ quotefmtinstall();
+ xenconsinit();
+
+ //consdebug = rdb;
+ print("\nPlan 9 (%s)\n", xenstart->magic);
+
+ cpuidentify();
+ // meminit() is not for us
+ confinit();
+ archinit();
+ xinit();
+ trapinit();
+ printinit();
+ cpuidprint();
+ mmuinit();
+ if(arch->intrinit) /* launches other processors on an mp */
+ arch->intrinit();
+ timersinit();
+ mathinit();
+ kbdenable();
+ xengrantinit();
+ if(arch->clockenable)
+ arch->clockenable();
+ procinit0();
+ initseg();
+
+ links();
+// conf.monitor = 1;
+ chandevreset();
+ pageinit();
+
+ swapinit();
+ userinit();
+ active.thunderbirdsarego = 1;
+ schedinit();
+}
+
+void
+mach0init(void)
+{
+ m = (Mach*)MACHADDR;
+ m->machno = 0;
+ conf.nmach = 1;
+ MACHP(0) = (Mach*)CPU0MACH;
+ m->pdb = (ulong*)xenstart->pt_base;
+#ifdef NOT
+ m->gdt = (Segdesc*)CPU0GDT;
+#endif
+
+ machinit();
+
+ active.machs = 1;
+ active.exiting = 0;
+}
+
+void
+machinit(void)
+{
+ int machno;
+ ulong *pdb;
+ Segdesc *gdt;
+
+ machno = m->machno;
+ pdb = m->pdb;
+ gdt = m->gdt;
+ memset(m, 0, sizeof(Mach));
+ m->machno = machno;
+ m->pdb = pdb;
+ m->gdt = gdt;
+ m->perf.period = 1;
+
+ /*
+ * For polled uart output at boot, need
+ * a default delay constant. 100000 should
+ * be enough for a while. Cpuidentify will
+ * calculate the real value later.
+ */
+ m->loopconst = 100000;
+ m->cpumhz = 1000; // XXX!
+
+ HYPERVISOR_shared_info = (shared_info_t*)mmumapframe(XENSHARED, (xenstart->shared_info)>>PGSHIFT);
+
+ // XXX m->shared = &HYPERVISOR_shared_info->vcpu_data[m->machno];
+}
+
+void
+init0(void)
+{
+ int i;
+ char buf[2*KNAMELEN];
+
+ up->nerrlab = 0;
+
+ spllo();
+
+ /*
+ * These are o.k. because rootinit is null.
+ * Then early kproc's will have a root and dot.
+ */
+ up->slash = namec("#/", Atodir, 0, 0);
+ pathclose(up->slash->path);
+ up->slash->path = newpath("/");
+ up->dot = cclone(up->slash);
+
+ chandevinit();
+
+ if(!waserror()){
+ snprint(buf, sizeof(buf), "%s %s", arch->id, conffile);
+ ksetenv("terminal", buf, 0);
+ ksetenv("cputype", "386", 0);
+ if(cpuserver)
+ ksetenv("service", "cpu", 0);
+ else
+ ksetenv("service", "terminal", 0);
+ ksetenv("readparts", "1", 0);
+ for(i = 0; i < nconf; i++){
+ if(confname[i][0] != '*')
+ ksetenv(confname[i], confval[i], 0);
+ ksetenv(confname[i], confval[i], 1);
+ }
+ poperror();
+ }
+
+ kproc("alarm", alarmkproc, 0);
+ touser(sp);
+}
+
+void
+userinit(void)
+{
+ Proc *p;
+ Segment *s;
+ KMap *k;
+ Page *pg;
+
+ p = newproc();
+ p->pgrp = newpgrp();
+ p->egrp = smalloc(sizeof(Egrp));
+ p->egrp->ref = 1;
+ p->fgrp = dupfgrp(nil);
+ p->rgrp = newrgrp();
+ p->procmode = 0640;
+
+ kstrdup(&eve, "");
+ kstrdup(&p->text, "*init*");
+ kstrdup(&p->user, eve);
+
+ p->fpstate = FPinit;
+ fpoff();
+
+ /*
+ * Kernel Stack
+ *
+ * N.B. make sure there's enough space for syscall to check
+ * for valid args and
+ * 4 bytes for gotolabel's return PC
+ */
+ p->sched.pc = (ulong)init0;
+ p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD);
+
+ /*
+ * User Stack
+ */
+ s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
+ p->seg[SSEG] = s;
+ pg = newpage(1, 0, USTKTOP-BY2PG);
+ segpage(s, pg);
+ k = kmap(pg);
+ bootargs(VA(k));
+ kunmap(k);
+
+ /*
+ * Text
+ */
+ s = newseg(SG_TEXT, UTZERO, 1);
+ s->flushme++;
+ p->seg[TSEG] = s;
+ pg = newpage(1, 0, UTZERO);
+ memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
+ segpage(s, pg);
+ k = kmap(s->map[0]->pages[0]);
+ memmove((ulong*)VA(k), initcode, sizeof initcode);
+ kunmap(k);
+ ready(p);
+}
+
+uchar *
+pusharg(char *p)
+{
+ int n;
+
+ n = strlen(p)+1;
+ sp -= n;
+ memmove(sp, p, n);
+ return sp;
+}
+
+void
+bootargs(ulong base)
+{
+ int i, ac;
+ uchar *av[32];
+ uchar **lsp;
+
+ sp = (uchar*)base + BY2PG - Ustkheadroom;
+
+ ac = 0;
+ av[ac++] = pusharg("/386/9dos");
+ av[ac++] = pusharg("-D");
+
+ /* 4 byte word align stack */
+ sp = (uchar*)((ulong)sp & ~3);
+
+ /* build argc, argv on stack */
+ sp -= (ac+1)*sizeof(sp);
+ lsp = (uchar**)sp;
+ for(i = 0; i < ac; i++)
+ *lsp++ = av[i] + ((USTKTOP - BY2PG) - base);
+ *lsp = 0;
+ sp += (USTKTOP - BY2PG) - base - sizeof(ulong);
+}
+
+char*
+getconf(char *name)
+{
+ int i;
+
+ for(i = 0; i < nconf; i++)
+ if(cistrcmp(confname[i], name) == 0)
+ return confval[i];
+ return 0;
+}
+
+static void
+writeconf(void)
+{
+ char *p, *q;
+ int n;
+
+ p = getconfenv();
+
+ if(waserror()) {
+ free(p);
+ nexterror();
+ }
+
+ /* convert to name=value\n format */
+ for(q=p; *q; q++) {
+ q += strlen(q);
+ *q = '=';
+ q += strlen(q);
+ *q = '\n';
+ }
+ n = q - p + 1;
+ if(n >= BOOTARGSLEN)
+ error("kernel configuration too large");
+ memmove(BOOTARGS, p, n);
+ poperror();
+ free(p);
+}
+
+void
+confinit(void)
+{
+ char *p;
+ int i, userpcnt;
+ ulong kpages;
+
+ for(i = 0; i < nconf; i++)
+ print("%s=%s\n", confname[i], confval[i]);
+ /*
+ * all ram above xentop is free, but must be mappable
+ * to virt addrs less than VIRT_START.
+ */
+ kpages = PADDR(hypervisor_virt_start)>>PGSHIFT;
+ if(xenstart->nr_pages <= kpages)
+ kpages = xenstart->nr_pages;
+ else
+ print("Warning: Plan 9 / Xen limitation - "
+ "using only %lud of %lud available RAM pages\n",
+ kpages, xenstart->nr_pages);
+ xentop = PGROUND(PADDR(xentop));
+ conf.mem[0].npage = kpages - (xentop>>PGSHIFT);
+ conf.mem[0].base = xentop;
+
+ if(p = getconf("*kernelpercent"))
+ userpcnt = 100 - strtol(p, 0, 0);
+ else
+ userpcnt = 0;
+
+ conf.npage = 0;
+ for(i=0; i<nelem(conf.mem); i++)
+ conf.npage += conf.mem[i].npage;
+
+ conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+ if(cpuserver)
+ conf.nproc *= 3;
+ if(conf.nproc > 2000)
+ conf.nproc = 2000;
+ conf.nimage = 200;
+ conf.nswap = conf.nproc*80;
+ conf.nswppo = 4096;
+
+ if(cpuserver) {
+ if(userpcnt < 10)
+ userpcnt = 70;
+ kpages = conf.npage - (conf.npage*userpcnt)/100;
+
+ /*
+ * Hack for the big boys. Only good while physmem < 4GB.
+ * Give the kernel fixed max + enough to allocate the
+ * page pool.
+ * This is an overestimate as conf.upages < conf.npages.
+ * The patch of nimage is a band-aid, scanning the whole
+ * page list in imagereclaim just takes too long.
+ */
+ if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){
+ kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG;
+ conf.nimage = 2000;
+ kpages += (conf.nproc*KSTACK)/BY2PG;
+ }
+ } else {
+ if(userpcnt < 10) {
+ if(conf.npage*BY2PG < 16*MB)
+ userpcnt = 40;
+ else
+ userpcnt = 60;
+ }
+ kpages = conf.npage - (conf.npage*userpcnt)/100;
+
+ /*
+ * Make sure terminals with low memory get at least
+ * 4MB on the first Image chunk allocation.
+ */
+ if(conf.npage*BY2PG < 16*MB)
+ imagmem->minarena = 4*1024*1024;
+ }
+
+ /*
+ * can't go past the end of virtual memory
+ * (ulong)-KZERO is 2^32 - KZERO
+ */
+ if(kpages > ((ulong)-KZERO)/BY2PG)
+ kpages = ((ulong)-KZERO)/BY2PG;
+
+ conf.upages = conf.npage - kpages;
+ conf.ialloc = (kpages/2)*BY2PG;
+
+ /*
+ * Guess how much is taken by the large permanent
+ * datastructures. Mntcache and Mntrpc are not accounted for
+ * (probably ~300KB).
+ */
+ kpages *= BY2PG;
+ kpages -= conf.upages*sizeof(Page)
+ + conf.nproc*sizeof(Proc)
+ + conf.nimage*sizeof(Image)
+ + conf.nswap
+ + conf.nswppo*sizeof(Page);
+ mainmem->maxsize = kpages;
+ if(!cpuserver){
+ /*
+ * give terminals lots of image memory, too; the dynamic
+ * allocation will balance the load properly, hopefully.
+ * be careful with 32-bit overflow.
+ */
+ imagmem->maxsize = kpages;
+ }
+}
+
+static char* mathmsg[] =
+{
+ nil, /* handled below */
+ "denormalized operand",
+ "division by zero",
+ "numeric overflow",
+ "numeric underflow",
+ "precision loss",
+};
+
+static void
+mathnote(void)
+{
+ int i;
+ ulong status;
+ char *msg, note[ERRMAX];
+
+ status = up->fpsave.status;
+
+ /*
+ * Some attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ msg = "unknown exception";
+ for(i = 1; i <= 5; i++){
+ if(!((1<<i) & status))
+ continue;
+ msg = mathmsg[i];
+ break;
+ }
+ if(status & 0x01){
+ if(status & 0x40){
+ if(status & 0x200)
+ msg = "stack overflow";
+ else
+ msg = "stack underflow";
+ }else
+ msg = "invalid operation";
+ }
+ snprint(note, sizeof note, "sys: fp: %s fppc=0x%lux status=0x%lux",
+ msg, up->fpsave.pc, status);
+ postnote(up, 1, note, NDebug);
+}
+
+/*
+ * math coprocessor error
+ */
+static void
+matherror(Ureg *ur, void*)
+{
+ /*
+ * a write cycle to port 0xF0 clears the interrupt latch attached
+ * to the error# line from the 387
+ */
+ if(!(m->cpuiddx & 0x01))
+ outb(0xF0, 0xFF);
+
+ /*
+ * save floating point state to check out error
+ */
+ fpenv(&up->fpsave);
+ mathnote();
+
+ if(ur->pc & KZERO)
+ panic("fp: status %ux fppc=0x%lux pc=0x%lux",
+ up->fpsave.status, up->fpsave.pc, ur->pc);
+}
+
+/*
+ * math coprocessor emulation fault
+ */
+static void
+mathemu(Ureg *ureg, void*)
+{
+ if(up->fpstate & FPillegal){
+ /* someone did floating point in a note handler */
+ postnote(up, 1, "sys: floating point in note handler", NDebug);
+ return;
+ }
+ switch(up->fpstate){
+ case FPinit:
+ fpinit();
+ up->fpstate = FPactive;
+ break;
+ case FPinactive:
+ /*
+ * Before restoring the state, check for any pending
+ * exceptions, there's no way to restore the state without
+ * generating an unmasked exception.
+ * More attention should probably be paid here to the
+ * exception masks and error summary.
+ */
+ if((up->fpsave.status & ~up->fpsave.control) & 0x07F){
+ mathnote();
+ break;
+ }
+ fprestore(&up->fpsave);
+ up->fpstate = FPactive;
+ break;
+ case FPactive:
+ panic("math emu pid %ld %s pc 0x%lux",
+ up->pid, up->text, ureg->pc);
+ break;
+ }
+}
+
+/*
+ * math coprocessor segment overrun
+ */
+static void
+mathover(Ureg*, void*)
+{
+ pexit("math overrun", 0);
+}
+
+void
+mathinit(void)
+{
+ trapenable(VectorCERR, matherror, 0, "matherror");
+ //if(X86FAMILY(m->cpuidax) == 3)
+ // intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
+ trapenable(VectorCNA, mathemu, 0, "mathemu");
+ trapenable(VectorCSO, mathover, 0, "mathover");
+}
+
+/*
+ * set up floating point for a new process
+ */
+void
+procsetup(Proc*p)
+{
+ p->fpstate = FPinit;
+ fpoff();
+}
+
+void
+procfork(Proc *p)
+{
+ int s;
+
+ p->kentry = up->kentry;
+ p->pcycles = -p->kentry;
+
+ /* inherit user descriptors */
+ memmove(p->gdt, up->gdt, sizeof(p->gdt));
+
+ /* copy local descriptor table */
+ if(up->ldt != nil && up->nldt > 0){
+ p->ldt = smalloc(sizeof(Segdesc) * up->nldt);
+ memmove(p->ldt, up->ldt, sizeof(Segdesc) * up->nldt);
+ p->nldt = up->nldt;
+ }
+
+ /* save floating point state */
+ s = splhi();
+ switch(up->fpstate & ~FPillegal){
+ case FPactive:
+ fpsave(&up->fpsave);
+ up->fpstate = FPinactive;
+ case FPinactive:
+ p->fpsave = up->fpsave;
+ p->fpstate = FPinactive;
+ }
+ splx(s);
+}
+
+void
+procrestore(Proc *p)
+{
+ uvlong t;
+
+ if(p->kp)
+ return;
+ cycles(&t);
+ p->pcycles -= t;
+}
+
+/*
+ * Save the mach dependent part of the process state.
+ */
+void
+procsave(Proc *p)
+{
+ uvlong t;
+
+ cycles(&t);
+ p->pcycles += t;
+ if(p->fpstate == FPactive){
+ if(p->state == Moribund)
+ fpclear();
+ else{
+ /*
+ * Fpsave() stores without handling pending
+ * unmasked exeptions. Postnote() can't be called
+ * here as sleep() already has up->rlock, so
+ * the handling of pending exceptions is delayed
+ * until the process runs again and generates an
+ * emulation fault to activate the FPU.
+ */
+ fpsave(&p->fpsave);
+ }
+ p->fpstate = FPinactive;
+ }
+
+ /*
+ * While this processor is in the scheduler, the process could run
+ * on another processor and exit, returning the page tables to
+ * the free list where they could be reallocated and overwritten.
+ * When this processor eventually has to get an entry from the
+ * trashed page tables it will crash.
+ *
+ * If there's only one processor, this can't happen.
+ * You might think it would be a win not to do this in that case,
+ * especially on VMware, but it turns out not to matter.
+ */
+ mmuflushtlb(0);
+}
+
+static void
+shutdown(int ispanic)
+{
+ int ms, once;
+
+ lock(&active);
+ if(ispanic)
+ active.ispanic = ispanic;
+ else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0)
+ active.ispanic = 0;
+ once = active.machs & (1<<m->machno);
+ active.machs &= ~(1<<m->machno);
+ active.exiting = 1;
+ unlock(&active);
+
+ if(once)
+ print("cpu%d: exiting\n", m->machno);
+ //spllo();
+ for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){
+ delay(TK2MS(2));
+ if(active.machs == 0 && consactive() == 0)
+ break;
+ }
+
+ if(getconf("*debug"))
+ delay(5*60*1000);
+
+ if(active.ispanic){
+ if(!cpuserver)
+ for(;;)
+ halt();
+ delay(10000);
+ }else
+ delay(1000);
+}
+
+void
+reboot(void *entry, void *code, ulong size)
+{
+ void (*f)(ulong, ulong, ulong);
+ //ulong *pdb;
+
+ writeconf();
+
+ shutdown(0);
+
+ /*
+ * should be the only processor running now
+ */
+
+ print("shutting down...\n");
+ delay(200);
+
+ splhi();
+
+ /* turn off buffered serial console */
+ serialoq = nil;
+
+ /* shutdown devices */
+ chandevshutdown();
+
+ /* reboot(0, ...) on Xen causes domU shutdown */
+ if(entry == 0)
+ HYPERVISOR_shutdown(0);
+
+ /*
+ * Modify the machine page table to directly map the low 4MB of memory
+ * This allows the reboot code to turn off the page mapping
+ */
+ //pdb = m->pdb;
+ //pdb[PDX(0)] = pdb[PDX(KZERO)];
+ mmuflushtlb(0);
+
+ /* setup reboot trampoline function */
+ f = (void*)REBOOTADDR;
+ memmove(f, rebootcode, sizeof(rebootcode));
+
+ print("rebooting...\n");
+
+ /* off we go - never to return */
+ (*f)(PADDR(entry), PADDR(code), size);
+}
+
+
+void
+exit(int ispanic)
+{
+ shutdown(ispanic);
+ arch->reset();
+}
+
+int
+cistrcmp(char *a, char *b)
+{
+ int ac, bc;
+
+ for(;;){
+ ac = *a++;
+ bc = *b++;
+
+ if(ac >= 'A' && ac <= 'Z')
+ ac = 'a' + (ac - 'A');
+ if(bc >= 'A' && bc <= 'Z')
+ bc = 'a' + (bc - 'A');
+ ac -= bc;
+ if(ac)
+ return ac;
+ if(bc == 0)
+ break;
+ }
+ return 0;
+}
+
+int
+cistrncmp(char *a, char *b, int n)
+{
+ unsigned ac, bc;
+
+ while(n > 0){
+ ac = *a++;
+ bc = *b++;
+ n--;
+
+ if(ac >= 'A' && ac <= 'Z')
+ ac = 'a' + (ac - 'A');
+ if(bc >= 'A' && bc <= 'Z')
+ bc = 'a' + (bc - 'A');
+
+ ac -= bc;
+ if(ac)
+ return ac;
+ if(bc == 0)
+ break;
+ }
+
+ return 0;
+}
+
--- /dev/null
+++ b/sys/src/9/xen/mem.h
@@ -1,0 +1,160 @@
+/*
+ * Memory and machine-specific definitions. Used in C and assembler.
+ */
+
+#define MIN(a, b) ((a) < (b)? (a): (b))
+#define MAX(a, b) ((a) > (b)? (a): (b))
+
+/*
+ * Sizes
+ */
+#define BI2BY 8 /* bits per byte */
+#define BI2WD 32 /* bits per word */
+#define BY2WD 4 /* bytes per word */
+#define BY2V 8 /* bytes per double word */
+#define BY2PG 4096 /* bytes per page */
+#define WD2PG (BY2PG/BY2WD) /* words per page */
+#define PGSHIFT 12 /* log(BY2PG) */
+#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
+#define PGROUND(s) ROUND(s, BY2PG)
+#define BLOCKALIGN 8
+#define FPalign 16 /* required for FXSAVE */
+
+#define MAXMACH 8 /* max # cpus system can run */
+#define MAX_VIRT_CPUS MAXMACH
+#define KSTACK 4096 /* Size of kernel stack */
+
+/*
+ * Time
+ */
+#define HZ (100) /* clock frequency */
+#define MS2HZ (1000/HZ) /* millisec per clock tick */
+#define TK2SEC(t) ((t)/HZ) /* ticks to seconds */
+
+/*
+ * Fundamental addresses
+ */
+#define REBOOTADDR 0x00001000 /* reboot code - physical address */
+#define APBOOTSTRAP 0x80001000 /* AP bootstrap code */
+#define MACHADDR 0x80002000 /* as seen by current processor */
+#define CPU0MACH MACHADDR /* Mach for bootstrap processor */
+#define XENCONSOLE 0x80003000 /* xen console ring */
+#define XENSHARED 0x80004000 /* xen shared page */
+#define XENBUS 0x80005000 /* xenbus aka xenstore ring */
+#define XENGRANTTAB 0x80006000 /* grant table */
+
+#define MACHSIZE BY2PG
+
+/*
+ * Address spaces
+ *
+ * User is at 0-2GB
+ * Kernel is at 2GB-4GB
+ */
+#define UZERO 0 /* base of user address space */
+#define UTZERO (UZERO+BY2PG) /* first address in user text */
+#define UTROUND(t) ROUNDUP((t), BY2PG)
+#define KZERO 0x80000000 /* base of kernel address space */
+#define KTZERO 0x80010000 /* first address in kernel text */
+#define USTKTOP (KZERO-BY2PG) /* byte just beyond user stack */
+#define USTKSIZE (16*1024*1024) /* size of user stack */
+#define TSTKTOP (USTKTOP-USTKSIZE) /* end of new stack in sysexec */
+#define TSTKSIZ 100
+
+/*
+ * known x86 segments (in GDT) and their selectors
+ */
+#define NULLSEG 0 /* null segment */
+#define KDSEG 1 /* kernel data/stack */
+#define KESEG 2 /* kernel executable */
+#define UDSEG 3 /* user data/stack */
+#define UESEG 4 /* user executable */
+#define TSSSEG 5 /* task segment */
+#define APMCSEG 6 /* APM code segment */
+#define APMCSEG16 7 /* APM 16-bit code segment */
+#define APMDSEG 8 /* APM data segment */
+#define PROCSEG0 11 /* per process descriptor0 */
+#define NPROCSEG 3 /* number of per process descriptors */
+#define NGDT 13 /* number of GDT entries required */
+/* #define APM40SEG 8 /* APM segment 0x40 */
+
+#define SELGDT (0<<2) /* selector is in gdt */
+#define SELLDT (1<<2) /* selector is in ldt */
+
+#define SELECTOR(i, t, p) (((i)<<3) | (t) | (p))
+
+#define NULLSEL SELECTOR(NULLSEG, SELGDT, 0)
+/* these are replaced by XEN entries */
+#ifdef NOPE // XXX investigate more
+#define KDSEL SELECTOR(KDSEG, SELGDT, 0)
+#define KESEL SELECTOR(KESEG, SELGDT, 0)
+#define UESEL SELECTOR(UESEG, SELGDT, 3)
+#define UDSEL SELECTOR(UDSEG, SELGDT, 3)
+/* comment out to make sure unused ... */
+
+#define TSSSEL SELECTOR(TSSSEG, SELGDT, 0)
+#define APMCSEL SELECTOR(APMCSEG, SELGDT, 0)
+#define APMCSEL16 SELECTOR(APMCSEG16, SELGDT, 0)
+#define APMDSEL SELECTOR(APMDSEG, SELGDT, 0)
+/* #define APM40SEL SELECTOR(APM40SEG, SELGDT, 0) */
+#else
+/* use the selectors that xen gives us */
+#define KESEL FLAT_KERNEL_CS
+#define KDSEL FLAT_KERNEL_DS
+#define UESEL FLAT_USER_CS
+#define UDSEL FLAT_USER_DS
+#endif
+
+/*
+ * fields in segment descriptors
+ */
+#define SEGDATA (0x10<<8) /* data/stack segment */
+#define SEGEXEC (0x18<<8) /* executable segment */
+#define SEGTSS (0x9<<8) /* TSS segment */
+#define SEGCG (0x0C<<8) /* call gate */
+#define SEGIG (0x0E<<8) /* interrupt gate */
+#define SEGTG (0x0F<<8) /* trap gate */
+#define SEGTYPE (0x1F<<8)
+
+#define SEGP (1<<15) /* segment present */
+#define SEGPL(x) ((x)<<13) /* priority level */
+#define SEGB (1<<22) /* granularity 1==4k (for expand-down) */
+#define SEGG (1<<23) /* granularity 1==4k (for other) */
+#define SEGE (1<<10) /* expand down */
+#define SEGW (1<<9) /* writable (for data/stack) */
+#define SEGR (1<<9) /* readable (for code) */
+#define SEGD (1<<22) /* default 1==32bit (for code) */
+
+/*
+ * virtual MMU
+ */
+#define PTEMAPMEM (1024*1024)
+#define PTEPERTAB (PTEMAPMEM/BY2PG)
+#define SEGMAPSIZE 1984
+#define SSEGMAPSIZE 16
+#define PPN(x) ((x)&~(BY2PG-1))
+#define PGOFF(x) ((x)&(BY2PG-1))
+
+/*
+ * physical MMU
+ */
+#define PTEVALID (1<<0)
+#define PTEWT (1<<3)
+#define PTEUNCACHED (1<<4)
+#define PTEWRITE (1<<1)
+#define PTERONLY (0<<1)
+#define PTEKERNEL (0<<2)
+#define PTEUSER (1<<2)
+#define PTESIZE (1<<7)
+#define PTEGLOBAL (1<<8)
+
+/*
+ * Macros for calculating offsets within the page directory base
+ * and page tables.
+ */
+#define PAX(va) (paemode? ((ulong)(va)>>29) & 0x6 : 0)
+#define PDX(va) (paemode? (((ulong)(va))>>20) & 0x03FE : (((ulong)(va))>>22) & 0x03FF)
+#define PTX(va) (paemode? (((ulong)(va))>>11) & 0x03FE : (((ulong)(va))>>12) & 0x03FF)
+#define PDB(pdb,va) (paemode? KADDR(MAPPN((pdb)[((ulong)(va)>>29) & 0x6])) : pdb)
+
+#define getpgcolor(a) 0
--- /dev/null
+++ b/sys/src/9/xen/mkfile
@@ -1,0 +1,188 @@
+CONF=xenpcf
+CONFLIST=xenpcf
+
+objtype=386
+</$objtype/mkfile
+p=9
+
+KTZERO=0x80010000
+KZERO=0x80000000
+KPZERO=0x10000
+PAE=yes
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+ alarm.$O\
+ alloc.$O\
+ allocb.$O\
+ auth.$O\
+ cache.$O\
+ chan.$O\
+ dev.$O\
+ edf.$O\
+ fault.$O\
+ page.$O\
+ parse.$O\
+ pgrp.$O\
+ portclock.$O\
+ print.$O\
+ proc.$O\
+ qio.$O\
+ qlock.$O\
+ rebootcmd.$O\
+ segment.$O\
+ swap.$O\
+ sysfile.$O\
+ sysproc.$O\
+ taslock.$O\
+ tod.$O\
+ xalloc.$O\
+
+XEN=\
+ xengrant.$O\
+ xentimer.$O\
+ xensystem.$O\
+
+SCHED=`{ls -p xen-public/sched*.h >[2]/dev/null}
+ARCH=`{test -d xen-public/arch-x86 && echo arch-x86/xen-x86_32.h arch-x86/xen.h || echo arch-x86_32.h }
+
+XENHEADERS=\
+ $ARCH\
+ xen.h\
+ event_channel.h\
+ grant_table.h\
+ memory.h\
+ physdev.h\
+ $SCHED\
+ io/ring.h\
+ io/blkif.h\
+ io/console.h\
+ io/netif.h\
+ io/xenbus.h\
+ io/xs_wire.h\
+
+OBJ=\
+ l.$O\
+ plan9l.$O\
+ xen.$O\
+ main.$O\
+ mmu.$O\
+ random.$O\
+ rdb.$O\
+ trap.$O\
+ $CONF.root.$O\
+ $CONF.rootc.$O\
+ $DEVS\
+ $PORT\
+ $XEN\
+
+LIB=\
+ /$objtype/lib/libmemlayer.a\
+ /$objtype/lib/libmemdraw.a\
+ /$objtype/lib/libdraw.a\
+ /$objtype/lib/libip.a\
+ /$objtype/lib/libsec.a\
+ /$objtype/lib/libmp.a\
+ /$objtype/lib/libc.a\
+
+ETHER=`{echo devether.c ether*.c | sed 's/\.c/.'$O'/g'}
+VGA=`{echo devvga.c screen.c vga*.c | sed 's/\.c/.'$O'/g'}
+SDEV=`{echo devsd.c sd*.c | sed 's/\.c/.'$O'/g'}
+
+PAE=`{echo $PAE | sed 's/yes/yes[extended-cr3]/'}
+XENELF='LOADER=generic,XEN_VER=xen-3.0,ELF_PADDR_OFFSET=0,VIRT_BASE='$KZERO',VIRT_ENTRY='$KTZERO',PAE='$PAE
+
+#$p$CONF: $CONF.c $OBJ $LIB xenbin
+# $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
+# $LD -o $target.tmp -T$KTZERO -l $OBJ $CONF.$O $LIB
+# ./xenbin <$target.tmp >$target
+# rm $target.tmp
+# size $target
+
+$p$CONF: $CONF.c $OBJ $LIB xenelf
+ $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
+ $LD -o $target.elf -H5 -T$KTZERO -P$KPZERO -l $OBJ $CONF.$O $LIB
+ ./xenelf $target.elf $target __xen_guest ''$XENELF''
+ size $target
+
+$p$CONF.gz: $p$CONF
+ #strip -o /fd/1 $p$CONF | gzip -9 > $p$CONF.gz
+ gzip -9 $p$CONF > $p$CONF.gz
+
+install:V: $p$CONF $p$CONF.gz
+ cp $p$CONF $p$CONF.gz /$objtype/
+ # import lookout / /n/lookout && cp $p$CONF $p$CONF.gz /n/lookout/$objtype/
+
+# copies generated by the rule below
+PCHEADERS=uncached.h etherif.h ethermii.h mp.h io.h
+
+REPCH=`{echo $PCHEADERS | sed 's/\.h//g; s/ /|/g'}
+^($REPCH)\.h:R: '../pc/\1.h'
+ cp $prereq .
+
+REPCC=`{../port/mkfilelist ../pc}
+^($REPCC)\.$O:R: '../pc/\1.c'
+ $CC $CFLAGS -I. -. ../pc/$stem1.c
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+ptclbsum386.$O: ../pc/ptclbsum386.s
+ $AS $AFLAGS ../pc/ptclbsum386.s
+
+# we inherited these.. revisit.
+$ETHER: etherif.h ../port/netif.h
+$SDEV: ../port/sd.h
+main.$O: init.h reboot.h
+trap.$O: /sys/include/tos.h
+
+%.$O: /$objtype/include/u.h ../port/lib.h mem.h dat.h fns.h io.h ../port/error.h ../port/portdat.h ../port/portfns.h xendat.h xendefs.h
+
+xendefs.h: xendat.h
+ grep '^#define[ ]+FLAT_' xendat.h >$target
+
+xendat.h:
+ { echo '#define __i386__ __i386__'; \
+ echo '#define __XEN_INTERFACE_VERSION__ 0x00030201'; \
+ echo '#define XEN_GUEST_HANDLE_00030205(type) type *'; \
+ cat xen-public/^($XENHEADERS) } | \
+ ./cppx > $target
+
+init.h: ../port/initcode.c ../pc/init9.c
+ $CC ../port/initcode.c
+ $CC ../pc/init9.c
+ $LD -l -R1 -o init.out init9.$O initcode.$O /386/lib/libc.a
+ {echo 'uchar initcode[]={'
+ strip -o /fd/1 init.out | xd -1x |
+ sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+ echo '};'} > init.h
+
+reboot.h: ../pc/rebootcode.s
+ $AS ../pc/rebootcode.s
+ $LD -l -s -T0x1000 -R4 -o reboot.out rebootcode.$O
+ {echo 'uchar rebootcode[]={'
+ xd -1x reboot.out |
+ sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+ echo '};'} > reboot.h
+
+acid:V:
+ $CC -a -w main.c>acid
+
+dpart: dpart.$O
+ $LD -o dpart dpart.$O
+
+xenstore: xenstore.$O
+ $LD -o xenstore xenstore.$O
+
+# XXX this is wrong if we're cross-compiling
+xenbin: xenbin.$O
+ $LD -o xenbin xenbin.$O
+xenelf: xenelf.$O
+ $LD -o xenelf xenelf.$O
+
+%.clean:V:
+ rm -f $stem.c [9bz]$stem [9bz]$stem.gz 9$stem.elf boot$stem.* reboot.h init.h xendat.h xendefs.h $PCHEADERS dpart xenbin xenelf xenstore
+
+
--- /dev/null
+++ b/sys/src/9/xen/mmu.c
@@ -1,0 +1,595 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+int paemode;
+uvlong *xenpdpt; /* this needs to go in Mach for multiprocessor guest */
+
+#define LOG(a)
+#define PUTMMULOG(a)
+#define MFN(pa) (patomfn[(pa)>>PGSHIFT])
+#define MAPPN(x) (paemode? matopfn[*(uvlong*)(&x)>>PGSHIFT]<<PGSHIFT : matopfn[(x)>>PGSHIFT]<<PGSHIFT)
+
+#define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
+#define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
+#define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
+ ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
+
+Segdesc gdt[NGDT] =
+{
+[NULLSEG] { 0, 0}, /* null descriptor */
+[KDSEG] DATASEGM(0), /* kernel data/stack */
+[KESEG] EXECSEGM(0), /* kernel code */
+[UDSEG] DATASEGM(3), /* user data/stack */
+[UESEG] EXECSEGM(3), /* user code */
+[TSSSEG] TSSSEGM(0,0), /* tss segment */
+};
+
+/* note: pdb must already be pinned */
+static void
+taskswitch(Page *pdb, ulong stack)
+{
+ Tss *tss;
+
+ tss = m->tss;
+ tss->ss0 = KDSEL;
+ tss->esp0 = stack;
+ tss->ss1 = KDSEL;
+ tss->esp1 = stack;
+ tss->ss2 = KDSEL;
+ tss->esp2 = stack;
+ //tss->cr3 = pdb;
+ HYPERVISOR_stack_switch(KDSEL, stack);
+ mmuflushtlb(pdb);
+}
+
+void
+mmuflushtlb(Page *pdb)
+{
+ int s, i;
+
+ if(!paemode){
+ if(pdb)
+ xenptswitch(pdb->pa);
+ else
+ xenptswitch(PADDR(m->pdb));
+ }else{
+ if(pdb){
+ s = splhi();
+ for(i = 0; i < 3; i++){
+ xenupdate((ulong*)&xenpdpt[i], pdb->pa | PTEVALID);
+ pdb = pdb->next;
+ }
+ splx(s);
+ }else{
+ s = splhi();
+ for(i = 0; i < 3; i++)
+ xenupdatema((ulong*)&xenpdpt[i], ((uvlong*)m->pdb)[i]);
+ splx(s);
+ }
+ xentlbflush();
+ }
+}
+
+/*
+ * On processors that support it, we set the PTEGLOBAL bit in
+ * page table and page directory entries that map kernel memory.
+ * Doing this tells the processor not to bother flushing them
+ * from the TLB when doing the TLB flush associated with a
+ * context switch (write to CR3). Since kernel memory mappings
+ * are never removed, this is safe. (If we ever remove kernel memory
+ * mappings, we can do a full flush by turning off the PGE bit in CR4,
+ * writing to CR3, and then turning the PGE bit back on.)
+ *
+ * See also mmukmap below.
+ *
+ * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
+ */
+static void
+memglobal(void)
+{
+ int i, j;
+ ulong *pde, *pte;
+
+ /* only need to do this once, on bootstrap processor */
+ if(m->machno != 0)
+ return;
+
+ if(!m->havepge)
+ return;
+
+ pde = m->pdb;
+ for(i=512; i<1024; i++){ /* 512: start at entry for virtual 0x80000000 */
+ if(pde[i] & PTEVALID){
+ pde[i] |= PTEGLOBAL;
+ if(!(pde[i] & PTESIZE)){
+ pte = KADDR(pde[i]&~(BY2PG-1));
+ for(j=0; j<1024; j++)
+ if(pte[j] & PTEVALID)
+ pte[j] |= PTEGLOBAL;
+ }
+ }
+ }
+}
+
+ulong
+mmumapframe(ulong va, ulong mfn)
+{
+ ulong *pte, pdbx;
+ uvlong ma;
+
+ /*
+ * map machine frame number to a virtual address.
+ * When called the pagedir and page table exist, we just
+ * need to fill in a page table entry.
+ */
+ ma = ((uvlong)mfn<<PGSHIFT) | PTEVALID|PTEWRITE;
+ pdbx = PDX(va);
+ pte = KADDR(MAPPN(PDB(m->pdb,va)[pdbx]));
+ xenupdatema(&pte[PTX(va)], ma);
+ return va;
+}
+
+void
+mmumapcpu0(void)
+{
+ ulong *pdb, *pte, va, pa, pdbx;
+
+ if(strstr(xenstart->magic, "x86_32p"))
+ paemode = 1;
+ hypervisor_virt_start = paemode ? 0xF5800000 : 0xFC000000;
+ patomfn = (ulong*)xenstart->mfn_list;
+ matopfn = (ulong*)hypervisor_virt_start;
+ /* Xen bug ? can't touch top entry in PDPT */
+ if(paemode)
+ hypervisor_virt_start = 0xC0000000;
+
+ /*
+ * map CPU0MACH at MACHADDR.
+ * When called the pagedir and page table exist, we just
+ * need to fill in a page table entry.
+ */
+ pdb = (ulong*)xenstart->pt_base;
+ va = MACHADDR;
+ pa = PADDR(CPU0MACH) | PTEVALID|PTEWRITE;
+ pdbx = PDX(va);
+ pdb = PDB(pdb, va);
+ pte = KADDR(MAPPN(pdb[pdbx]));
+ xenupdate(&pte[PTX(va)], pa);
+}
+
+void
+mmuinit(void)
+{
+//XXX ulong x;
+//XXX ushort ptr[3];
+ ulong *pte, npgs, pa;
+ extern int rtsr(void);
+
+ if(paemode){
+ int i;
+ xenpdpt = (uvlong*)m->pdb;
+ m->pdb = xspanalloc(32, 32, 0);
+ /* clear "reserved" bits in initial page directory pointers -- Xen bug? */
+ for(i = 0; i < 4; i++)
+ ((uvlong*)m->pdb)[i] = xenpdpt[i] & ~0x1E6LL;
+ }
+
+ /*
+ * So far only memory up to xentop is mapped, map the rest.
+ * We cant use large pages because our contiguous PA space
+ * is not necessarily contiguous in MA.
+ */
+ npgs = conf.mem[0].npage;
+ for(pa=conf.mem[0].base; npgs; npgs--, pa+=BY2PG) {
+ pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 1);
+ if(!pte)
+ panic("mmuinit");
+ xenupdate(pte, pa|PTEVALID|PTEWRITE);
+ }
+
+ memglobal();
+
+ m->tss = malloc(sizeof(Tss));
+ memset(m->tss, 0, sizeof(Tss));
+ m->tss->iomap = 0xDFFF<<16;
+
+ /*
+ * We used to keep the GDT in the Mach structure, but it
+ * turns out that that slows down access to the rest of the
+ * page. Since the Mach structure is accessed quite often,
+ * it pays off anywhere from a factor of 1.25 to 2 on real
+ * hardware to separate them (the AMDs are more sensitive
+ * than Intels in this regard). Under VMware it pays off
+ * a factor of about 10 to 100.
+ */
+
+#ifdef we_dont_set_gdt_or_lidt
+ memmove(m->gdt, gdt, sizeof gdt);
+ x = (ulong)m->tss;
+ m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
+ m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
+
+ ptr[0] = sizeof(gdt)-1;
+ x = (ulong)m->gdt;
+ ptr[1] = x & 0xFFFF;
+ ptr[2] = (x>>16) & 0xFFFF;
+ lgdt(ptr);
+
+ ptr[0] = sizeof(Segdesc)*256-1;
+ x = IDTADDR;
+ ptr[1] = x & 0xFFFF;
+ ptr[2] = (x>>16) & 0xFFFF;
+ lidt(ptr);
+#endif
+
+#ifdef we_may_eventually_want_this
+ /* make kernel text unwritable */
+ for(x = KTZERO; x < (ulong)etext; x += BY2PG){
+ p = mmuwalk(m->pdb, x, 2, 0);
+ if(p == nil)
+ panic("mmuinit");
+ *p &= ~PTEWRITE;
+ }
+#endif
+
+ taskswitch(0, (ulong)m + BY2PG);
+#ifdef we_dont_do_this
+ ltr(TSSSEL);
+#endif
+}
+
+void
+flushmmu(void)
+{
+ int s;
+
+ s = splhi();
+ up->newtlb = 1;
+ mmuswitch(up);
+ splx(s);
+}
+
+static ulong*
+mmupdb(Page *pg, ulong va)
+{
+ int i;
+
+ for(i = PAX(va); i > 0; i -= 2)
+ pg = pg->next;
+ return (ulong*)pg->va;
+}
+
+/* this can be called with an active pdb, so use Xen calls to zero it out.
+ */
+static void
+mmuptefree(Proc* proc)
+{
+ ulong *pdb, va;
+ Page **last, *page;
+
+ if(proc->mmupdb && proc->mmuused){
+ last = &proc->mmuused;
+ for(page = *last; page; page = page->next){
+ /* this is no longer a pte page so make it readwrite */
+ va = page->daddr;
+ pdb = mmupdb(proc->mmupdb, va);
+ xenupdatema(&pdb[PDX(va)], 0);
+ xenptunpin(page->va);
+ last = &page->next;
+ }
+ *last = proc->mmufree;
+ proc->mmufree = proc->mmuused;
+ proc->mmuused = 0;
+ }
+}
+
+void
+mmuswitch(Proc* proc)
+{
+ //ulong *pdb;
+
+ if(proc->newtlb){
+ mmuptefree(proc);
+ proc->newtlb = 0;
+ }
+
+ if(proc->mmupdb){
+ //XXX doesn't work for some reason, but it's not needed for uniprocessor
+ //pdb = (ulong*)proc->mmupdb->va;
+ //xenupdate(&pdb[PDX(MACHADDR)], m->pdb[PDX(MACHADDR)]);
+ taskswitch(proc->mmupdb, (ulong)(proc->kstack+KSTACK));
+ }
+ else
+ taskswitch(0, (ulong)(proc->kstack+KSTACK));
+}
+
+void
+mmurelease(Proc* proc)
+{
+ Page *page, *next;
+
+ /*
+ * Release any pages allocated for a page directory base or page-tables
+ * for this process:
+ * switch to the prototype pdb for this processor (m->pdb);
+ * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
+ * onto the process' free list (proc->mmufree). This has the side-effect of
+ * cleaning any user entries in the pdb (proc->mmupdb);
+ * if there's a pdb put it in the cache of pre-initialised pdb's
+ * for this processor (m->pdbpool) or on the process' free list;
+ * finally, place any pages freed back into the free pool (palloc).
+ * This routine is only called from sched() with palloc locked.
+ */
+ taskswitch(0, (ulong)m + BY2PG);
+ mmuptefree(proc);
+
+ if((page = proc->mmupdb) != 0){
+ proc->mmupdb = 0;
+ while(page){
+ next = page->next;
+ /* its not a page table anymore, mark it rw */
+ xenptunpin(page->va);
+ if(paemode || m->pdbcnt > 10){
+ page->next = proc->mmufree;
+ proc->mmufree = page;
+ }
+ else{
+ page->next = m->pdbpool;
+ m->pdbpool = page;
+ m->pdbcnt++;
+ }
+ page = next;
+ }
+ }
+
+ for(page = proc->mmufree; page; page = next){
+ next = page->next;
+ if(--page->ref)
+ panic("mmurelease: page->ref %d\n", page->ref);
+ pagechainhead(page);
+ }
+ if(proc->mmufree && palloc.r.p)
+ wakeup(&palloc.r);
+ proc->mmufree = 0;
+}
+
+static Page*
+mmupdballoc(ulong va, void *mpdb)
+{
+ int s;
+ Page *page;
+ Page *badpages, *pg;
+
+ s = splhi();
+ /*
+ * All page tables must be read-only. We will mark them
+ * readwrite later when we free them and they are no
+ * longer used as page tables.
+ */
+ if(m->pdbpool == 0){
+ spllo();
+ badpages = 0;
+ for (;;) {
+ page = newpage(0, 0, 0);
+ page->va = VA(kmap(page));
+ if(mpdb)
+ memmove((void*)page->va, mpdb, BY2PG);
+ else
+ memset((void*)page->va, 0, BY2PG);
+ if (xenpgdpin(page->va))
+ break;
+ /*
+ * XXX Plan 9 is a bit lax about putting pages on the free list when they are
+ * still mapped (r/w) by some process's page table. From Plan 9's point
+ * of view this is safe because the any such process will have up->newtlb set,
+ * so the mapping will be cleared before the process is dispatched. But the Xen
+ * hypervisor has no way of knowing this, so it refuses to pin the page for use
+ * as a pagetable.
+ */
+ if(0) print("bad pgdpin %lux va %lux copy %lux %s\n", MFN(PADDR(page->va)), va, (ulong)mpdb, up? up->text: "");
+ page->next = badpages;
+ badpages = page;
+ }
+ while (badpages != 0) {
+ pg = badpages;
+ badpages = badpages->next;
+ putpage(pg);
+ }
+ }
+ else{
+ page = m->pdbpool;
+ m->pdbpool = page->next;
+ m->pdbcnt--;
+ if (!xenpgdpin(page->va))
+ panic("xenpgdpin");
+ }
+ splx(s);
+
+ page->next = 0;
+ return page;
+}
+
+void
+checkmmu(ulong va, ulong pa)
+{
+ ulong *pdb, *pte;
+ int pdbx;
+
+ if(up->mmupdb == 0)
+ return;
+
+ pdb = mmupdb(up->mmupdb, va);
+ pdbx = PDX(va);
+ if(MAPPN(pdb[pdbx]) == 0){
+ /* okay to be empty - will fault and get filled */
+ return;
+ }
+
+ pte = KADDR(MAPPN(pdb[pdbx]));
+ if(MAPPN(pte[PTX(va)]) != pa){
+ if(!paemode)
+ print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux (0x%08lux)\n",
+ up->pid, up->text,
+ va, pa, pte[PTX(va)], MAPPN(pte[PTX(va)]));
+ else
+ print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%16llux (0x%08lux)\n",
+ up->pid, up->text,
+ va, pa, *(uvlong*)&pte[PTX(va)], MAPPN(pte[PTX(va)]));
+ }
+}
+
+void
+putmmu(ulong va, ulong pa, Page*)
+{
+ int pdbx;
+ Page *page;
+ Page *badpages, *pg;
+ ulong *pdb, *pte;
+ int i, s;
+
+ PUTMMULOG(dprint("putmmu va %lux pa %lux\n", va, pa);)
+ if(up->mmupdb == 0){
+ if(!paemode)
+ up->mmupdb = mmupdballoc(va, m->pdb);
+ else {
+ page = 0;
+ for(i = 4; i >= 0; i -= 2){
+ if(m->pdb[i])
+ pg = mmupdballoc(va, KADDR(MAPPN(m->pdb[i])));
+ else
+ pg = mmupdballoc(va, 0);
+ pg->next = page;
+ page = pg;
+ }
+ up->mmupdb = page;
+ }
+ }
+ pdb = mmupdb(up->mmupdb, va);
+ pdbx = PDX(va);
+
+ if(PPN(pdb[pdbx]) == 0){
+ PUTMMULOG(dprint("new pt page for index %d pdb %lux\n", pdbx, (ulong)pdb);)
+ /* mark page as readonly before using as a page table */
+ if(up->mmufree == 0){
+ badpages = 0;
+ for (;;) {
+ page = newpage(1, 0, 0);
+ page->va = VA(kmap(page));
+ if (xenptpin(page->va))
+ break;
+ if(0) print("bad pin %lux va %lux %s\n", MFN(PADDR(page->va)), va, up->text);
+ page->next = badpages;
+ badpages = page;
+ }
+ while (badpages != 0) {
+ pg = badpages;
+ badpages = badpages->next;
+ putpage(pg);
+ }
+ }
+ else {
+ page = up->mmufree;
+ up->mmufree = page->next;
+ memset((void*)page->va, 0, BY2PG);
+ if (!xenptpin(page->va))
+ panic("xenptpin");
+ }
+
+ xenupdate(&pdb[pdbx], page->pa|PTEVALID|PTEUSER|PTEWRITE);
+
+ page->daddr = va;
+ page->next = up->mmuused;
+ up->mmuused = page;
+ }
+
+ pte = KADDR(MAPPN(pdb[pdbx]));
+ PUTMMULOG(dprint("pte %lux index %lud old %lux new %lux mfn %lux\n", (ulong)pte, PTX(va), pte[PTX(va)], pa|PTEUSER, MFN(pa));)
+ xenupdate(&pte[PTX(va)], pa|PTEUSER);
+
+ s = splhi();
+ //XXX doesn't work for some reason, but it's not needed for uniprocessor
+ //xenupdate(&pdb[PDX(MACHADDR)], m->pdb[PDX(MACHADDR)]);
+ mmuflushtlb(up->mmupdb);
+ splx(s);
+}
+
+ulong*
+mmuwalk(ulong* pdb, ulong va, int level, int create)
+{
+ ulong pa, va2, *table;
+
+ /*
+ * Walk the page-table pointed to by pdb and return a pointer
+ * to the entry for virtual address va at the requested level.
+ * If the entry is invalid and create isn't requested then bail
+ * out early. Otherwise, for the 2nd level walk, allocate a new
+ * page-table page and register it in the 1st level.
+ */
+ if(paemode){
+ pdb = &pdb[PAX(va)];
+ if(!(*pdb & PTEVALID)){
+ if(create == 0)
+ return 0;
+ panic("mmuwalk: missing pgdir ptr for va=%lux\n", va);
+ }
+ pdb = KADDR(MAPPN(*pdb));
+ }
+ table = &pdb[PDX(va)];
+ if(!(*table & PTEVALID) && create == 0)
+ return 0;
+
+ switch(level){
+
+ default:
+ return 0;
+
+ case 1:
+ return table;
+
+ case 2:
+ if(*table & PTESIZE)
+ panic("mmuwalk2: va %luX entry %luX\n", va, *table);
+ if(!(*table & PTEVALID)){
+ va2 = (ulong)xspanalloc(BY2PG, BY2PG, 0);
+ pa = PADDR(va2);
+ xenptpin(va2);
+ xenupdate(table, pa|PTEWRITE|PTEVALID);
+ }
+ table = KADDR(MAPPN(*table));
+
+ return &table[PTX(va)];
+ }
+}
+
+int
+mmukmapsync(ulong va)
+{
+ USED(va);
+ return 0;
+}
+
+/*
+ * More debugging.
+ */
+void
+countpagerefs(ulong *ref, int print)
+{
+ USED(ref);
+ USED(print);
+}
+
+/*
+ * Return the number of bytes that can be accessed via KADDR(pa).
+ * If pa is not a valid argument to KADDR, return 0.
+ */
+ulong
+cankaddr(ulong pa)
+{
+ if(pa >= -KZERO)
+ return 0;
+ return -KZERO - pa;
+}
--- /dev/null
+++ b/sys/src/9/xen/plan9l.s
@@ -1,0 +1,53 @@
+#include "xendefs.h"
+#include "mem.h"
+
+/*
+ * This must match io.h.
+ */
+#define VectorSYSCALL 0x40
+
+/*
+ * Used to get to the first process:
+ * set up an interrupt return frame and IRET to user level.
+ */
+TEXT touser(SB), $0
+ PUSHL $(UDSEL) /* old ss */
+ MOVL sp+0(FP), AX /* old sp */
+ PUSHL AX
+ MOVL $0x200, AX /* interrupt enable flag */
+ PUSHL AX /* old flags */
+ PUSHL $(UESEL) /* old cs */
+ PUSHL $(UTZERO+32) /* old pc */
+ MOVL $(UDSEL), AX
+ MOVW AX, DS
+ MOVW AX, ES
+ MOVW AX, GS
+ MOVW AX, FS
+ IRETL
+
+/*
+ * This is merely _strayintr from l.s optimised to vector
+ * to syscall() without going through trap().
+ */
+TEXT _syscallintr(SB), $0
+ PUSHL $VectorSYSCALL /* trap type */
+
+ PUSHL DS
+ PUSHL ES
+ PUSHL FS
+ PUSHL GS
+ PUSHAL
+ MOVL $(KDSEL), AX
+ MOVW AX, DS
+ MOVW AX, ES
+ PUSHL SP
+ CALL syscall(SB)
+
+ POPL AX
+ POPAL
+ POPL GS
+ POPL FS
+ POPL ES
+ POPL DS
+ ADDL $8, SP /* pop error code and trap type */
+ IRETL
--- /dev/null
+++ b/sys/src/9/xen/screen.h
@@ -1,0 +1,186 @@
+typedef struct Cursor Cursor;
+typedef struct Cursorinfo Cursorinfo;
+struct Cursorinfo {
+ Cursor;
+ Lock;
+};
+
+/* devmouse.c */
+extern void mousetrack(int, int, int, int);
+extern void absmousetrack(int, int, int, int);
+extern Point mousexy(void);
+
+extern void mouseaccelerate(int);
+extern int m3mouseputc(Queue*, int);
+extern int m5mouseputc(Queue*, int);
+extern int mouseputc(Queue*, int);
+
+extern Cursorinfo cursor;
+extern Cursor arrow;
+
+/*
+ * Generic VGA registers.
+ */
+enum {
+ MiscW = 0x03C2, /* Miscellaneous Output (W) */
+ MiscR = 0x03CC, /* Miscellaneous Output (R) */
+ Status0 = 0x03C2, /* Input status 0 (R) */
+ Status1 = 0x03DA, /* Input Status 1 (R) */
+ FeatureR = 0x03CA, /* Feature Control (R) */
+ FeatureW = 0x03DA, /* Feature Control (W) */
+
+ Seqx = 0x03C4, /* Sequencer Index, Data at Seqx+1 */
+ Crtx = 0x03D4, /* CRT Controller Index, Data at Crtx+1 */
+ Grx = 0x03CE, /* Graphics Controller Index, Data at Grx+1 */
+ Attrx = 0x03C0, /* Attribute Controller Index and Data */
+
+ PaddrW = 0x03C8, /* Palette Address Register, write */
+ Pdata = 0x03C9, /* Palette Data Register */
+ Pixmask = 0x03C6, /* Pixel Mask Register */
+ PaddrR = 0x03C7, /* Palette Address Register, read */
+ Pstatus = 0x03C7, /* DAC Status (RO) */
+
+ Pcolours = 256, /* Palette */
+ Pred = 0,
+ Pgreen = 1,
+ Pblue = 2,
+
+ Pblack = 0x00,
+ Pwhite = 0xFF,
+};
+
+#define VGAMEM() 0xA0000
+#define vgai(port) inb(port)
+#define vgao(port, data) outb(port, data)
+
+extern int vgaxi(long, uchar);
+extern int vgaxo(long, uchar, uchar);
+
+/*
+ */
+typedef struct VGAdev VGAdev;
+typedef struct VGAcur VGAcur;
+typedef struct VGAscr VGAscr;
+
+struct VGAdev {
+ char* name;
+
+ void (*enable)(VGAscr*);
+ void (*disable)(VGAscr*);
+ void (*page)(VGAscr*, int);
+ void (*linear)(VGAscr*, int, int);
+ void (*drawinit)(VGAscr*);
+ int (*fill)(VGAscr*, Rectangle, ulong);
+ void (*ovlctl)(VGAscr*, Chan*, void*, int);
+ int (*ovlwrite)(VGAscr*, void*, int, vlong);
+ void (*flush)(VGAscr*, Rectangle);
+};
+
+struct VGAcur {
+ char* name;
+
+ void (*enable)(VGAscr*);
+ void (*disable)(VGAscr*);
+ void (*load)(VGAscr*, Cursor*);
+ int (*move)(VGAscr*, Point);
+
+ int doespanning;
+};
+
+/*
+ */
+struct VGAscr {
+ Lock devlock;
+ VGAdev* dev;
+ Pcidev* pci;
+
+ VGAcur* cur;
+ ulong storage;
+ Cursor;
+
+ int useflush;
+
+ ulong paddr; /* frame buffer */
+ void* vaddr;
+ int apsize;
+
+ ulong io; /* device specific registers */
+ ulong *mmio;
+
+ ulong colormap[Pcolours][3];
+ int palettedepth;
+
+ Memimage* gscreen;
+ Memdata* gscreendata;
+ Memsubfont* memdefont;
+
+ int (*fill)(VGAscr*, Rectangle, ulong);
+ int (*scroll)(VGAscr*, Rectangle, Rectangle);
+ void (*blank)(VGAscr*, int);
+ ulong id; /* internal identifier for driver use */
+ int isblank;
+ int overlayinit;
+ int softscreen;
+};
+
+extern VGAscr vgascreen[];
+
+enum {
+ Backgnd = 0, /* black */
+};
+
+/* mouse.c */
+extern void mousectl(Cmdbuf*);
+extern void mouseresize(void);
+extern void mouseredraw(void);
+
+/* screen.c */
+extern int hwaccel; /* use hw acceleration; default on */
+extern int hwblank; /* use hw blanking; default on */
+extern int panning; /* use virtual screen panning; default off */
+extern void addvgaseg(char*, ulong, ulong);
+extern uchar* attachscreen(Rectangle*, ulong*, int*, int*, int*);
+extern void flushmemscreen(Rectangle);
+extern void cursoron(void);
+extern void cursoroff(void);
+extern void setcursor(Cursor*);
+extern int screensize(int, int, int, ulong);
+extern int screenaperture(int, int);
+extern Rectangle physgscreenr; /* actual monitor size */
+extern void blankscreen(int);
+
+extern VGAcur swcursor;
+extern void swcursorinit(void);
+extern void swcursorhide(void);
+extern void swcursoravoid(Rectangle);
+extern void swcursorunhide(void);
+
+/* devdraw.c */
+extern void deletescreenimage(void);
+extern void resetscreenimage(void);
+extern int drawhasclients(void);
+extern ulong blanktime;
+extern void setscreenimageclipr(Rectangle);
+extern void drawflush(void);
+extern int drawidletime(void);
+extern QLock drawlock;
+
+/* vga.c */
+extern void vgascreenwin(VGAscr*);
+extern void vgaimageinit(ulong);
+extern void vgalinearpci(VGAscr*);
+extern void vgalinearaddr(VGAscr*, ulong, int);
+
+extern void drawblankscreen(int);
+extern void vgablank(VGAscr*, int);
+
+extern Lock vgascreenlock;
+
+#define ishwimage(i) (vgascreen[0].gscreendata && (i)->data->bdata == vgascreen[0].gscreendata->bdata)
+
+/* swcursor.c */
+void swcursorhide(void);
+void swcursoravoid(Rectangle);
+void swcursordraw(Point);
+void swcursorload(Cursor *);
+void swcursorinit(void);
--- /dev/null
+++ b/sys/src/9/xen/sdxen.c
@@ -1,0 +1,356 @@
+/*
+ * Xen block storage device frontend
+ *
+ * The present implementation follows the principle of
+ * "what's the simplest thing that could possibly work?".
+ * We can think about performance later.
+ * We can think about dynamically attaching and removing devices later.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "../port/error.h"
+
+#include "../port/sd.h"
+
+#define LOG(a)
+
+/*
+ * conversions to machine page numbers, pages and addresses
+ */
+#define MFN(pa) (patomfn[(pa)>>PGSHIFT])
+#define MFNPG(pa) (MFN(pa)<<PGSHIFT)
+#define PA2MA(pa) (MFNPG(pa) | PGOFF(pa))
+#define VA2MA(va) PA2MA(PADDR(va))
+#define VA2MFN(va) MFN(PADDR(va))
+
+enum {
+ Ndevs = 4,
+ MajorDevSD = 0x800,
+ MajorDevHDA = 0x300,
+ MajorDevHDC = 0x1600,
+ MajorDevXVD = 0xCA00,
+};
+
+extern SDifc sdxenifc;
+
+typedef struct Ctlr Ctlr;
+
+struct Ctlr {
+ int online;
+ ulong secsize;
+ ulong sectors;
+ int backend;
+ int devid;
+ int evtchn;
+ blkif_front_ring_t ring;
+ int ringref;
+ Lock ringlock;
+ char *frame;
+ QLock iolock;
+ int iodone;
+ Rendez wiodone;
+};
+
+static int
+ringinit(Ctlr *ctlr, char *a)
+{
+ blkif_sring_t *sr;
+
+ sr = (blkif_sring_t*)a;
+ memset(sr, 0, BY2PG);
+ SHARED_RING_INIT(sr);
+ FRONT_RING_INIT(&ctlr->ring, sr, BY2PG);
+ ctlr->ringref = shareframe(ctlr->backend, sr, 1);
+ return BY2PG;
+}
+
+static int
+vbdsend(Ctlr *ctlr, int write, int ref, int nb, uvlong bno)
+{
+ blkif_request_t *req;
+ int i, notify;
+
+ ilock(&ctlr->ringlock); // XXX conservative
+ i = ctlr->ring.req_prod_pvt;
+ req = RING_GET_REQUEST(&ctlr->ring, i); // XXX overflow?
+
+ req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
+ req->nr_segments = 1;
+ req->handle = ctlr->devid;
+ req->id = 1;
+ req->sector_number = bno;
+ req->seg[0].gref = ref;
+ req->seg[0].first_sect = 0;
+ req->seg[0].last_sect = nb-1;
+
+ ctlr->ring.req_prod_pvt = i+1;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->ring, notify);
+ iunlock(&ctlr->ringlock);
+ return notify;
+}
+
+static void
+backendconnect(Ctlr *ctlr)
+{
+ char dir[64];
+ char buf[64];
+
+ sprint(dir, "device/vbd/%d/", ctlr->devid);
+ xenstore_setd(dir, "ring-ref", ctlr->ringref);
+ xenstore_setd(dir, "event-channel", ctlr->evtchn);
+ xenstore_setd(dir, "state", XenbusStateInitialised);
+ xenstore_gets(dir, "backend", buf, sizeof buf);
+ sprint(dir, "%s/", buf);
+ HYPERVISOR_yield();
+ xenstore_gets(dir, "state", buf, sizeof buf);
+ while (strtol(buf, 0, 0) != XenbusStateConnected) {
+ print("sdxen: waiting for vbd %d to connect\n", ctlr->devid);
+ tsleep(&up->sleep, return0, 0, 1000);
+ xenstore_gets(dir, "state", buf, sizeof buf);
+ }
+ xenstore_gets(dir, "sector-size", buf, sizeof buf);
+ ctlr->secsize = strtol(buf, 0, 0);
+ xenstore_gets(dir, "sectors", buf, sizeof buf);
+ ctlr->sectors = strtol(buf, 0, 0);
+ print("sdxen: backend %s secsize %ld sectors %ld\n", dir, ctlr->secsize, ctlr->sectors);
+ if (ctlr->secsize > BY2PG)
+ panic("sdxen: sector size bigger than mmu page size");
+}
+
+static void
+backendactivate(Ctlr *ctlr)
+{
+ char dir[64];
+
+ sprint(dir, "device/vbd/%d/", ctlr->devid);
+ xenstore_setd(dir, "state", XenbusStateConnected);
+}
+
+static SDev*
+xenpnp(void)
+{
+ SDev *sdev[Ndevs];
+ static char idno[Ndevs] = { '0', 'C', 'D', 'E' };
+ static char nunit[Ndevs] = { 8, 2, 2, 8 };
+ int i;
+
+ for (i = 0; i < Ndevs; i++) {
+ sdev[i] = mallocz(sizeof(SDev), 1);
+ sdev[i]->ifc = &sdxenifc;
+ sdev[i]->idno = idno[i];
+ sdev[i]->nunit = nunit[i];
+ sdev[i]->ctlr = (Ctlr**)mallocz(sdev[i]->nunit*sizeof(Ctlr*), 1);
+ if (i > 0)
+ sdev[i]->next = sdev[i-1];
+ }
+ return sdev[Ndevs-1];
+}
+
+static int
+linuxdev(int idno, int subno)
+{
+ switch (idno) {
+ case '0':
+ return MajorDevSD + 16*subno;
+ case 'C':
+ return MajorDevHDA + 64*subno;
+ case 'D':
+ return MajorDevHDC + 64*subno;
+ case 'E':
+ return MajorDevXVD + 16*subno;
+ default:
+ return 0;
+ }
+}
+
+static int
+xenverify(SDunit *unit)
+{
+ Ctlr *ctlr;
+ char dir[64];
+ char buf[64];
+ int devid;
+ int npage;
+ char *p;
+
+ if (unit->subno > unit->dev->nunit)
+ return 0;
+ devid = linuxdev(unit->dev->idno, unit->subno);
+ sprint(dir, "device/vbd/%d/", devid);
+ if (xenstore_gets(dir, "backend-id", buf, sizeof buf) <= 0)
+ return 0;
+
+ ctlr = mallocz(sizeof(Ctlr), 1);
+ ((Ctlr**)unit->dev->ctlr)[unit->subno] = ctlr;
+ ctlr->devid = devid;
+ ctlr->backend = strtol(buf, 0, 0);
+
+ npage = 2;
+ p = xspanalloc(npage<<PGSHIFT, BY2PG, 0);
+ p += ringinit(ctlr, p);
+ ctlr->frame = p;
+ ctlr->evtchn = xenchanalloc(ctlr->backend);
+ backendconnect(ctlr);
+
+ unit->inquiry[0] = 0; // XXX how do we know if it's a CD?
+ unit->inquiry[2] = 2;
+ unit->inquiry[3] = 2;
+ unit->inquiry[4] = sizeof(unit->inquiry)-4;
+ strcpy((char*)&unit->inquiry[8], "Xen block device");
+
+ return 1;
+}
+
+static int
+wiodone(void *a)
+{
+ return ((Ctlr*)a)->iodone != 0;
+}
+
+static void
+sdxenintr(Ureg *, void *a)
+{
+ Ctlr *ctlr = a;
+ blkif_response_t *rsp;
+ int i, avail;
+
+ ilock(&ctlr->ringlock); // XXX conservative
+ for (;;) {
+ RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->ring, avail);
+ if (!avail)
+ break;
+ i = ctlr->ring.rsp_cons;
+ rsp = RING_GET_RESPONSE(&ctlr->ring, i);
+ LOG(dprint("sdxen rsp %llud %d %d\n", rsp->id, rsp->operation, rsp->status);)
+ if (rsp->status == BLKIF_RSP_OKAY)
+ ctlr->iodone = 1;
+ else
+ ctlr->iodone = -1;
+ ctlr->ring.rsp_cons = ++i;
+ }
+ iunlock(&ctlr->ringlock);
+ if (ctlr->iodone)
+ wakeup(&ctlr->wiodone);
+}
+
+static Ctlr *kickctlr;
+
+static void
+kickme(void)
+{
+ Ctlr *ctlr = kickctlr;
+ shared_info_t *s;
+
+ if (ctlr) {
+ s = HYPERVISOR_shared_info;
+ dprint("tick %d %d prod %d cons %d pending %x mask %x\n",
+ m->ticks, ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons,
+ s->evtchn_pending[0], s->evtchn_mask[0]);
+ sdxenintr(0, ctlr);
+ }
+}
+
+static int
+xenonline(SDunit *unit)
+{
+ Ctlr *ctlr;
+
+ ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno];
+ unit->sectors = ctlr->sectors;
+ unit->secsize = ctlr->secsize;
+ if (ctlr->online == 0) {
+ intrenable(ctlr->evtchn, sdxenintr, ctlr, BUSUNKNOWN, "vbd");
+ //kickctlr = ctlr;
+ //addclock0link(kickme, 10000);
+ backendactivate(ctlr);
+ ctlr->online = 1;
+ }
+
+ return 1;
+}
+
+static int
+xenrio(SDreq*)
+{
+ return -1;
+}
+
+static long
+xenbio(SDunit* unit, int lun, int write, void* data, long nb, uvlong bno)
+{
+ Ctlr *ctlr;
+ char *buf;
+ long bcount, len;
+ int ref;
+ int n;
+
+ USED(lun); // XXX meaningless
+ ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno];
+ LOG(("xenbio %c %lux %ld %lld\n", write? 'w' : 'r', (ulong)data, nb, bno);)
+ buf = data;
+ // XXX extra copying & fragmentation could be avoided by
+ // redefining sdmalloc() to get page-aligned buffers
+ if ((ulong)data&(BY2PG-1))
+ buf = ctlr->frame;
+ bcount = BY2PG/unit->secsize;
+ qlock(&ctlr->iolock);
+ for (n = nb; n > 0; n -= bcount) {
+ ref = shareframe(ctlr->backend, buf, !write);
+ if (bcount > n)
+ bcount = n;
+ len = bcount*unit->secsize;
+ if (write && buf == ctlr->frame)
+ memmove(buf, data, len);
+ ctlr->iodone = 0;
+ if (vbdsend(ctlr, write, ref, bcount, bno))
+ xenchannotify(ctlr->evtchn);
+ LOG(dprint("sleeping %d prod %d cons %d pending %x mask %x \n", ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons,
+ HYPERVISOR_shared_info->evtchn_pending[0], HYPERVISOR_shared_info->evtchn_mask[0]);)
+ sleep(&ctlr->wiodone, wiodone, ctlr);
+ xengrantend(ref);
+ if (ctlr->iodone < 0) {
+ qunlock(&ctlr->iolock);
+ return -1;
+ }
+ if (buf == ctlr->frame) {
+ if (!write)
+ memmove(data, buf, len);
+ data = (char*)data + len;
+ } else
+ buf += len;
+ bno += bcount;
+ }
+ qunlock(&ctlr->iolock);
+ return (nb-n)*unit->secsize;
+}
+
+static void
+xenclear(SDev *)
+{
+}
+
+SDifc sdxenifc = {
+ "xen", /* name */
+
+ xenpnp, /* pnp */
+ 0, /* legacy */
+ 0, /* enable */
+ 0, /* disable */
+
+ xenverify, /* verify */
+ xenonline, /* online */
+ xenrio, /* rio */
+ 0, /* rctl */
+ 0, /* wctl */
+
+ xenbio, /* bio */
+ 0, /* probe */
+ xenclear, /* clear */
+ 0, /* stat */
+};
--- /dev/null
+++ b/sys/src/9/xen/trap.c
@@ -1,0 +1,1108 @@
+#include "u.h"
+#include "tos.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "../port/error.h"
+#include <trace.h>
+
+#define INTRLOG(a)
+#define SETUPLOG(a)
+#define SYSCALLLOG(a)
+#define FAULTLOG(a)
+#define FAULTLOGFAST(a)
+#define POSTNOTELOG(a)
+#define TRAPLOG(a)
+
+int faultpanic = 0;
+
+enum {
+ /* trap_info_t flags */
+ SPL0 = 0,
+ SPL3 = 3,
+ EvDisable = 4,
+};
+
+void noted(Ureg*, ulong);
+
+static void debugbpt(Ureg*, void*);
+static void fault386(Ureg*, void*);
+static void safe_fault386(Ureg*, void*);
+static void doublefault(Ureg*, void*);
+static void unexpected(Ureg*, void*);
+static void _dumpstack(Ureg*);
+
+static Lock vctllock;
+static Vctl *vctl[256];
+
+enum
+{
+ Ntimevec = 20 /* number of time buckets for each intr */
+};
+ulong intrtimes[256][Ntimevec];
+
+void
+intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
+{
+ int vno;
+ Vctl *v;
+
+/**/
+ SETUPLOG(dprint("intrenable: irq %d, f %p, a %p, tbdf 0x%x, name %s\n",
+ irq, f, a, tbdf, name);)
+/**/
+ if(f == nil){
+ print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
+ irq, tbdf, name);
+ return;
+ }
+
+ v = xalloc(sizeof(Vctl));
+ v->isintr = 1;
+ v->irq = irq;
+ v->tbdf = tbdf;
+ v->f = f;
+ v->a = a;
+ strncpy(v->name, name, KNAMELEN-1);
+ v->name[KNAMELEN-1] = 0;
+
+ ilock(&vctllock);
+ vno = arch->intrenable(v);
+ if(vno == -1){
+ iunlock(&vctllock);
+ print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
+ irq, tbdf, v->name);
+ xfree(v);
+ return;
+ }
+ if(vctl[vno]){
+ if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
+ panic("intrenable: handler: %s %s %p %p %p %p\n",
+ vctl[vno]->name, v->name,
+ vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
+ v->next = vctl[vno];
+ }
+ vctl[vno] = v;
+ SETUPLOG(dprint("INTRENABLE: vctl[%d] is %p\n", vno, vctl[vno]);)
+ iunlock(&vctllock);
+}
+
+int
+intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
+{
+ Vctl **pv, *v;
+ int vno;
+
+ /*
+ * For now, none of this will work with the APIC code,
+ * there is no mapping between irq and vector as the IRQ
+ * is pretty meaningless.
+ */
+ if(arch->intrvecno == nil)
+ return -1;
+ vno = arch->intrvecno(irq);
+ ilock(&vctllock);
+ pv = &vctl[vno];
+ while (*pv &&
+ ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
+ strcmp((*pv)->name, name)))
+ pv = &((*pv)->next);
+ assert(*pv);
+
+ v = *pv;
+ *pv = (*pv)->next; /* Link out the entry */
+
+ if(vctl[vno] == nil && arch->intrdisable != nil)
+ arch->intrdisable(irq);
+ iunlock(&vctllock);
+ xfree(v);
+ return 0;
+}
+
+static long
+irqallocread(Chan*, void *vbuf, long n, vlong offset)
+{
+ char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
+ int m, vno;
+ long oldn;
+ Vctl *v;
+
+ if(n < 0 || offset < 0)
+ error(Ebadarg);
+
+ oldn = n;
+ buf = vbuf;
+ for(vno=0; vno<nelem(vctl); vno++){
+ for(v=vctl[vno]; v; v=v->next){
+ m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
+ if(m <= offset) /* if do not want this, skip entry */
+ offset -= m;
+ else{
+ /* skip offset bytes */
+ m -= offset;
+ p = str+offset;
+ offset = 0;
+
+ /* write at most max(n,m) bytes */
+ if(m > n)
+ m = n;
+ memmove(buf, p, m);
+ n -= m;
+ buf += m;
+
+ if(n == 0)
+ return oldn;
+ }
+ }
+ }
+ return oldn - n;
+}
+
+void
+trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
+{
+ Vctl *v;
+
+ if(vno < 0 || vno >= VectorPIC)
+ panic("trapenable: vno %d\n", vno);
+ v = xalloc(sizeof(Vctl));
+ v->tbdf = BUSUNKNOWN;
+ v->f = f;
+ v->a = a;
+ strncpy(v->name, name, KNAMELEN);
+ v->name[KNAMELEN-1] = 0;
+
+ lock(&vctllock);
+ if(vctl[vno])
+ v->next = vctl[vno]->next;
+ vctl[vno] = v;
+ unlock(&vctllock);
+}
+
+static void
+nmienable(void)
+{
+ /* leave this here in case plan 9 ever makes it to dom0 */
+#ifdef NOWAY
+ /*
+ * Hack: should be locked with NVRAM access.
+ */
+ outb(0x70, 0x80); /* NMI latch clear */
+ outb(0x70, 0);
+
+ x = inb(0x61) & 0x07; /* Enable NMI */
+ outb(0x61, 0x08|x);
+ outb(0x61, x);
+#endif
+}
+
+/* we started out doing the 'giant bulk init' for all traps.
+ * we're going to do them one-by-one since error analysis is
+ * so much easier that way.
+ */
+void
+trapinit(void)
+{
+ trap_info_t t[2];
+ ulong vaddr;
+ int v, flag;
+
+ HYPERVISOR_set_callbacks(
+ KESEL, (ulong)hypervisor_callback,
+ KESEL, (ulong)failsafe_callback);
+
+ /* XXX rework as single hypercall once debugged */
+ t[1].address = 0;
+ vaddr = (ulong)vectortable;
+ for(v = 0; v < 256; v++){
+ switch(v){
+ case VectorBPT:
+ case VectorSYSCALL:
+ flag = SPL3 | EvDisable;
+ break;
+ default:
+ flag = SPL0 | EvDisable;
+ break;
+ }
+ t[0] = (trap_info_t){ v, flag, KESEL, vaddr };
+ if(HYPERVISOR_set_trap_table(t) < 0)
+ panic("trapinit: FAIL: try to set: 0x%x, 0x%x, 0x%x, 0x%ulx\n",
+ t[0].vector, t[0].flags, t[0].cs, t[0].address);
+ vaddr += 6;
+ }
+
+ /*
+ * Special traps.
+ * Syscall() is called directly without going through trap().
+ */
+ trapenable(VectorBPT, debugbpt, 0, "debugpt");
+ trapenable(VectorPF, fault386, 0, "fault386");
+ trapenable(Vector2F, doublefault, 0, "doublefault");
+ trapenable(Vector15, unexpected, 0, "unexpected");
+
+ nmienable();
+ addarchfile("irqalloc", 0444, irqallocread, nil);
+}
+
+static char* excname[32] = {
+ "divide error",
+ "debug exception",
+ "nonmaskable interrupt",
+ "breakpoint",
+ "overflow",
+ "bounds check",
+ "invalid opcode",
+ "coprocessor not available",
+ "double fault",
+ "coprocessor segment overrun",
+ "invalid TSS",
+ "segment not present",
+ "stack exception",
+ "general protection violation",
+ "page fault",
+ "15 (reserved)",
+ "coprocessor error",
+ "alignment check",
+ "machine check",
+ "19 (reserved)",
+ "20 (reserved)",
+ "21 (reserved)",
+ "22 (reserved)",
+ "23 (reserved)",
+ "24 (reserved)",
+ "25 (reserved)",
+ "26 (reserved)",
+ "27 (reserved)",
+ "28 (reserved)",
+ "29 (reserved)",
+ "30 (reserved)",
+ "31 (reserved)",
+};
+
+/*
+ * keep histogram of interrupt service times
+ */
+void
+intrtime(Mach*, int vno)
+{
+ ulong diff;
+ ulong x;
+
+ x = perfticks();
+ diff = x - m->perf.intrts;
+ m->perf.intrts = x;
+
+ m->perf.inintr += diff;
+ if(up == nil && m->perf.inidle > diff)
+ m->perf.inidle -= diff;
+
+ diff /= m->cpumhz*100; // quantum = 100µsec
+ if(diff >= Ntimevec)
+ diff = Ntimevec-1;
+ intrtimes[vno][diff]++;
+}
+
+/* go to user space */
+void
+kexit(Ureg*)
+{
+ uvlong t;
+ Tos *tos;
+
+ /* precise time accounting, kernel exit */
+ tos = (Tos*)(USTKTOP-sizeof(Tos));
+ cycles(&t);
+ tos->kcycles += t - up->kentry;
+ tos->pcycles = up->pcycles;
+ tos->pid = up->pid;
+ INTRLOG(dprint("leave kexit, TOS %p\n", tos);)
+}
+
+/*
+ * All traps come here. It is slower to have all traps call trap()
+ * rather than directly vectoring the handler. However, this avoids a
+ * lot of code duplication and possible bugs. The only exception is
+ * VectorSYSCALL.
+ * Trap is called with interrupts (and events) disabled via interrupt-gates.
+ */
+void
+trap(Ureg* ureg)
+{
+ int clockintr, i, vno, user;
+ char buf[ERRMAX];
+ Vctl *ctl, *v;
+ Mach *mach;
+
+ TRAPLOG(dprint("trap ureg %lux %lux\n", (ulong*)ureg, ureg->trap);)
+ m->perf.intrts = perfticks();
+ user = (ureg->cs & 0xFFFF) == UESEL;
+ if(user){
+ up->dbgreg = ureg;
+ cycles(&up->kentry);
+ }
+
+ clockintr = 0;
+
+ vno = ureg->trap;
+ if(vno < 0 || vno >= 256)
+ panic("bad interrupt number %d\n", vno);
+ TRAPLOG(dprint("trap: vno is 0x%x, vctl[%d] is %p\n", vno, vno, vctl[vno]);)
+ if(ctl = vctl[vno]){
+ INTRLOG(dprint("ctl is %p, isintr is %d\n", ctl, ctl->isintr);)
+ if(ctl->isintr){
+ m->intr++;
+ if(vno >= VectorPIC && vno != VectorSYSCALL)
+ m->lastintr = ctl->irq;
+ }
+
+ INTRLOG(dprint("ctl %p, isr %p\n", ctl, ctl->isr);)
+ if(ctl->isr)
+ ctl->isr(vno);
+ for(v = ctl; v != nil; v = v->next){
+ INTRLOG(dprint("ctl %p, f is %p\n", v, v->f);)
+ if(v->f)
+ v->f(ureg, v->a);
+ }
+ INTRLOG(dprint("ctl %p, eoi %p\n", ctl, ctl->eoi);)
+ if(ctl->eoi)
+ ctl->eoi(vno);
+
+ if(ctl->isintr){
+ intrtime(m, vno);
+
+ //if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
+ if (ctl->tbdf != BUSUNKNOWN && ctl->irq == VIRQ_TIMER)
+ clockintr = 1;
+
+ if(up && !clockintr)
+ preempted();
+ }
+ }
+ else if(vno <= nelem(excname) && user){
+ spllo();
+ sprint(buf, "sys: trap: %s", excname[vno]);
+ postnote(up, 1, buf, NDebug);
+ }
+ else if(vno >= VectorPIC && vno != VectorSYSCALL){
+ /*
+ * An unknown interrupt.
+ * Check for a default IRQ7. This can happen when
+ * the IRQ input goes away before the acknowledge.
+ * In this case, a 'default IRQ7' is generated, but
+ * the corresponding bit in the ISR isn't set.
+ * In fact, just ignore all such interrupts.
+ */
+
+ /* call all interrupt routines, just in case */
+ for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
+ ctl = vctl[i];
+ if(ctl == nil)
+ continue;
+ if(!ctl->isintr)
+ continue;
+ for(v = ctl; v != nil; v = v->next){
+ if(v->f)
+ v->f(ureg, v->a);
+ }
+ /* should we do this? */
+ if(ctl->eoi)
+ ctl->eoi(i);
+ }
+
+ iprint("cpu%d: spurious interrupt %d, last %d\n",
+ m->machno, vno, m->lastintr);
+ if(0)if(conf.nmach > 1){
+ for(i = 0; i < 32; i++){
+ if(!(active.machs & (1<<i)))
+ continue;
+ mach = MACHP(i);
+ if(m->machno == mach->machno)
+ continue;
+ print(" cpu%d: last %d",
+ mach->machno, mach->lastintr);
+ }
+ print("\n");
+ }
+ m->spuriousintr++;
+ if(user)
+ kexit(ureg);
+ return;
+ }
+ else{
+ if(vno == VectorNMI){
+ nmienable();
+ if(m->machno != 0){
+ print("cpu%d: PC %8.8luX\n",
+ m->machno, ureg->pc);
+ for(;;);
+ }
+ }
+ dumpregs(ureg);
+ if(!user){
+ ureg->sp = (ulong)&ureg->sp;
+ _dumpstack(ureg);
+ }
+ if(vno < nelem(excname))
+ panic("%s", excname[vno]);
+ panic("unknown trap/intr: %d\n", vno);
+ }
+ splhi();
+
+ /* delaysched set because we held a lock or because our quantum ended */
+ if(up && up->delaysched && clockintr){
+ INTRLOG(dprint("calling sched in trap? \n");)
+ sched();
+ INTRLOG(dprint("Back from calling sched in trap?\n");)
+ splhi();
+ }
+
+ if(user){
+ if(up->procctl || up->nnote)
+ notify(ureg);
+ kexit(ureg);
+ }
+
+ if (ureg->trap == 0xe) {
+ /*
+ * on page fault, we need to restore the old spl
+ * Xen won't do it for us.
+ * XXX verify this.
+ */
+ if (ureg->flags & 0x200)
+ spllo();
+ }
+}
+
+void
+dumpregs2(Ureg* ureg)
+{
+ if(up)
+ print("cpu%d: registers for %s %lud\n",
+ m->machno, up->text, up->pid);
+ else
+ print("cpu%d: registers for kernel\n", m->machno);
+ print("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
+ ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
+ print(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
+ print(" AX %8.8luX BX %8.8luX CX %8.8luX DX %8.8luX\n",
+ ureg->ax, ureg->bx, ureg->cx, ureg->dx);
+ print(" SI %8.8luX DI %8.8luX BP %8.8luX\n",
+ ureg->si, ureg->di, ureg->bp);
+ print(" CS %4.4luX DS %4.4luX ES %4.4luX FS %4.4luX GS %4.4luX\n",
+ ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
+ ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
+}
+
+void
+dumpregs(Ureg* ureg)
+{
+ extern ulong etext;
+
+ dumpregs2(ureg);
+
+ /*
+ * Processor control registers.
+ * If machine check exception, time stamp counter, page size extensions
+ * or enhanced virtual 8086 mode extensions are supported, there is a
+ * CR4. If there is a CR4 and machine check extensions, read the machine
+ * check address and machine check type registers if RDMSR supported.
+ */
+ print("SKIPPING get of crx and other such stuff.\n");/* */
+#ifdef NOT
+ print(" CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
+ getcr0(), getcr2(), getcr3());
+ if(m->cpuiddx & 0x9A){
+ print(" CR4 %8.8lux", getcr4());
+ if((m->cpuiddx & 0xA0) == 0xA0){
+ rdmsr(0x00, &mca);
+ rdmsr(0x01, &mct);
+ print("\n MCA %8.8llux MCT %8.8llux", mca, mct);
+ }
+ }
+#endif
+ print("\n ur %lux up %lux\n", (ulong)ureg, (ulong)up);
+}
+
+
+/*
+ * Fill in enough of Ureg to get a stack trace, and call a function.
+ * Used by debugging interface rdb.
+ */
+void
+callwithureg(void (*fn)(Ureg*))
+{
+ Ureg ureg;
+ ureg.pc = getcallerpc(&fn);
+ ureg.sp = (ulong)&fn;
+ fn(&ureg);
+}
+
+static void
+_dumpstack(Ureg *ureg)
+{
+ ulong l, v, i, estack;
+ extern ulong etext;
+ int x;
+
+ if(getconf("*nodumpstack")){
+ iprint("dumpstack disabled\n");
+ return;
+ }
+ iprint("dumpstack\n");
+ x = 0;
+ x += print("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
+ i = 0;
+ if(up
+ && (ulong)&l >= (ulong)up->kstack
+ && (ulong)&l <= (ulong)up->kstack+KSTACK)
+ estack = (ulong)up->kstack+KSTACK;
+ else if((ulong)&l >= (ulong)m->stack
+ && (ulong)&l <= (ulong)m+BY2PG)
+ estack = (ulong)m+MACHSIZE;
+ else
+ return;
+ x += print("estackx %.8lux\n", estack);
+
+ for(l=(ulong)&l; l<estack; l+=4){
+ v = *(ulong*)l;
+ if((KTZERO < v && v < (ulong)&etext) || estack-l<32){
+ /*
+ * we could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
+ * and CALL indirect through AX (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
+ * but this is too clever and misses faulting address.
+ */
+ x += print("%.8lux=%.8lux ", l, v);
+ i++;
+ }
+ if(i == 4){
+ i = 0;
+ x += print("\n");
+ }
+ }
+ if(i)
+ print("\n");
+ print("EOF\n");
+}
+
+void
+dumpstack(void)
+{
+ callwithureg(_dumpstack);
+}
+
+static void
+debugbpt(Ureg* ureg, void*)
+{
+ char buf[ERRMAX];
+ print("debugbpt\n");
+ if(up == 0)
+ panic("kernel bpt");
+ /* restore pc to instruction that caused the trap */
+ ureg->pc--;
+ sprint(buf, "sys: breakpoint");
+ postnote(up, 1, buf, NDebug);
+ print("debugbpt for proc %lud\n", up->pid);
+}
+
+static void
+doublefault(Ureg*, void*)
+{
+ panic("double fault");
+}
+
+static void
+unexpected(Ureg* ureg, void*)
+{
+ print("unexpected trap %lud; ignoring\n", ureg->trap);
+}
+
+static void
+fault386(Ureg* ureg, void* )
+{
+ ulong addr;
+ int read, user, n, insyscall;
+ char buf[ERRMAX];
+
+ addr = HYPERVISOR_shared_info->vcpu_info[m->machno].arch.cr2;
+ if (faultpanic) {
+ dprint("cr2 is 0x%lx\n", addr);
+ //dumpregs(ureg);
+ dumpstack();
+ panic("fault386");
+ exit(1);
+ }
+
+ user = (ureg->cs & 0xFFFF) == UESEL;
+ if(!user && mmukmapsync(addr))
+ return;
+ read = !(ureg->ecode & 2);
+ if(up == nil)
+ panic("fault but up is zero; pc 0x%8.8lux addr 0x%8.8lux\n", ureg->pc, addr);
+ insyscall = up->insyscall;
+ up->insyscall = 1;
+ n = fault(addr, read);
+ if(n < 0){
+ if(!user){
+ dumpregs(ureg);
+ panic("fault: 0x%lux\n", addr);
+ }
+ sprint(buf, "sys: trap: fault %s addr=0x%lux",
+ read? "read" : "write", addr);
+ dprint("Posting %s to %lud\n", buf, up->pid);
+ postnote(up, 1, buf, NDebug);
+ }
+ up->insyscall = insyscall;
+ FAULTLOG(dprint("fault386: all done\n");)
+}
+
+/*
+ * system calls
+ */
+#include "../port/systab.h"
+
+/*
+ * Syscall is called directly from assembler without going through trap().
+ */
+void
+syscall(Ureg* ureg)
+{
+ char *e;
+ ulong sp;
+ long ret;
+ int i, s;
+ ulong scallnr;
+
+ SYSCALLLOG(dprint("%d: syscall ...#%ld(%s)\n",
+ up->pid, ureg->ax, sysctab[ureg->ax]);)
+
+ if((ureg->cs & 0xFFFF) != UESEL)
+ panic("syscall: cs 0x%4.4luX\n", ureg->cs);
+
+ cycles(&up->kentry);
+
+ m->syscall++;
+ up->insyscall = 1;
+ up->pc = ureg->pc;
+ up->dbgreg = ureg;
+
+ if(up->procctl == Proc_tracesyscall){
+ up->procctl = Proc_stopme;
+ procctl(up);
+ }
+
+ scallnr = ureg->ax;
+ up->scallnr = scallnr;
+ if(scallnr == RFORK && up->fpstate == FPactive){
+ fpsave(&up->fpsave);
+ up->fpstate = FPinactive;
+ }
+ spllo();
+
+ sp = ureg->usp;
+ up->nerrlab = 0;
+ ret = -1;
+ if(!waserror()){
+ if(scallnr >= nsyscall || systab[scallnr] == 0){
+ pprint("bad sys call number %lud pc %lux\n",
+ scallnr, ureg->pc);
+ postnote(up, 1, "sys: bad sys call", NDebug);
+ error(Ebadarg);
+ }
+
+ if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
+ validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+ up->s = *((Sargs*)(sp+BY2WD));
+ up->psstate = sysctab[scallnr];
+
+ ret = systab[scallnr]((va_list)up->s.args);
+ poperror();
+ }else{
+ /* failure: save the error buffer for errstr */
+ e = up->syserrstr;
+ up->syserrstr = up->errstr;
+ up->errstr = e;
+ if(0 && up->pid == 1)
+ print("syscall %lud error %s\n", scallnr, up->syserrstr);
+ }
+ if(up->nerrlab){
+ print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
+ for(i = 0; i < NERR; i++)
+ print("sp=%lux pc=%lux\n",
+ up->errlab[i].sp, up->errlab[i].pc);
+ panic("error stack");
+ }
+
+ SYSCALLLOG(dprint("%d: Syscall %d returns %d, ureg %p\n", up->pid, scallnr, ret, ureg);)
+ /*
+ * Put return value in frame. On the x86 the syscall is
+ * just another trap and the return value from syscall is
+ * ignored. On other machines the return value is put into
+ * the results register by caller of syscall.
+ */
+ ureg->ax = ret;
+
+ if(up->procctl == Proc_tracesyscall){
+ up->procctl = Proc_stopme;
+ s = splhi();
+ procctl(up);
+ splx(s);
+ }
+
+ up->insyscall = 0;
+ up->psstate = 0;
+ INTRLOG(dprint("cleared insyscall\n");)
+ if(scallnr == NOTED)
+ noted(ureg, *(ulong*)(sp+BY2WD));
+
+ if(scallnr!=RFORK && (up->procctl || up->nnote)){
+ splhi();
+ notify(ureg);
+ }
+ /* if we delayed sched because we held a lock, sched now */
+ if(up->delaysched)
+ sched();
+ INTRLOG(dprint("before kexit\n");)
+ kexit(ureg);
+}
+
+/*
+ * Call user, if necessary, with note.
+ * Pass user the Ureg struct and the note on his stack.
+ */
+int
+notify(Ureg* ureg)
+{
+ int l;
+ ulong s, sp;
+ Note *n;
+
+ if(up->procctl)
+ procctl(up);
+ if(up->nnote == 0)
+ return 0;
+
+ if(up->fpstate == FPactive){
+ fpsave(&up->fpsave);
+ up->fpstate = FPinactive;
+ }
+ up->fpstate |= FPillegal;
+
+ s = spllo();
+ qlock(&up->debug);
+ up->notepending = 0;
+ n = &up->note[0];
+ if(strncmp(n->msg, "sys:", 4) == 0){
+ l = strlen(n->msg);
+ if(l > ERRMAX-15) /* " pc=0x12345678\0" */
+ l = ERRMAX-15;
+ sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
+ }
+
+ if(n->flag!=NUser && (up->notified || up->notify==0)){
+ if(n->flag == NDebug)
+ pprint("suicide: %s\n", n->msg);
+ qunlock(&up->debug);
+ pexit(n->msg, n->flag!=NDebug);
+ }
+
+ if(up->notified){
+ qunlock(&up->debug);
+ splhi();
+ return 0;
+ }
+
+ if(!up->notify){
+ qunlock(&up->debug);
+ pexit(n->msg, n->flag!=NDebug);
+ }
+ sp = ureg->usp;
+ sp -= sizeof(Ureg);
+
+ if(!okaddr((ulong)up->notify, 1, 0)
+ || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
+ pprint("suicide: bad address in notify\n");
+ qunlock(&up->debug);
+ pexit("Suicide", 0);
+ }
+
+ up->ureg = (void*)sp;
+ memmove((Ureg*)sp, ureg, sizeof(Ureg));
+ *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
+ up->ureg = (void*)sp;
+ sp -= BY2WD+ERRMAX;
+ memmove((char*)sp, up->note[0].msg, ERRMAX);
+ sp -= 3*BY2WD;
+ *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */
+ *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg; /* arg 1 is ureg* */
+ *(ulong*)(sp+0*BY2WD) = 0; /* arg 0 is pc */
+ ureg->usp = sp;
+ ureg->pc = (ulong)up->notify;
+ up->notified = 1;
+ up->nnote--;
+ memmove(&up->lastnote, &up->note[0], sizeof(Note));
+ memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+ qunlock(&up->debug);
+ splx(s);
+ return 1;
+}
+
+/*
+ * Return user to state before notify()
+ */
+void
+noted(Ureg* ureg, ulong arg0)
+{
+ Ureg *nureg;
+ ulong oureg, sp;
+
+ qlock(&up->debug);
+ if(arg0!=NRSTR && !up->notified) {
+ qunlock(&up->debug);
+ pprint("call to noted() when not notified\n");
+ pexit("Suicide", 0);
+ }
+ up->notified = 0;
+
+ nureg = up->ureg; /* pointer to user returned Ureg struct */
+
+ up->fpstate &= ~FPillegal;
+
+ /* sanity clause */
+ oureg = (ulong)nureg;
+ if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
+ pprint("bad ureg in noted or call to noted when not notified\n");
+ qunlock(&up->debug);
+ pexit("Suicide", 0);
+ }
+
+ /*
+ * Check the segment selectors are all valid, otherwise
+ * a fault will be taken on attempting to return to the
+ * user process.
+ * Take care with the comparisons as different processor
+ * generations push segment descriptors in different ways.
+ */
+ if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
+ || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
+ || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
+ pprint("bad segment selector in noted\n");
+ pprint("cs is %#lux, wanted %#ux\n", nureg->cs, UESEL);
+ pprint("ds is %#lux, wanted %#ux\n", nureg->ds, UDSEL);
+ pprint("es is %#lux, fs is %#lux, gs %#lux, wanted %#ux\n",
+ ureg->es, ureg->fs, ureg->gs, UDSEL);
+ pprint("ss is %#lux, wanted %#ux\n", nureg->ss, UDSEL);
+ qunlock(&up->debug);
+ pexit("Suicide", 0);
+ }
+
+ /* don't let user change system flags */
+ nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
+
+ memmove(ureg, nureg, sizeof(Ureg));
+
+ switch(arg0){
+ case NCONT:
+ case NRSTR:
+ if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
+ qunlock(&up->debug);
+ pprint("suicide: trap in noted\n");
+ pexit("Suicide", 0);
+ }
+ up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
+ qunlock(&up->debug);
+ break;
+
+ case NSAVE:
+ if(!okaddr(nureg->pc, BY2WD, 0)
+ || !okaddr(nureg->usp, BY2WD, 0)){
+ qunlock(&up->debug);
+ pprint("suicide: trap in noted\n");
+ pexit("Suicide", 0);
+ }
+ qunlock(&up->debug);
+ sp = oureg-4*BY2WD-ERRMAX;
+ splhi();
+ ureg->sp = sp;
+ ((ulong*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */
+ ((ulong*)sp)[0] = 0; /* arg 0 is pc */
+ break;
+
+ default:
+ pprint("unknown noted arg 0x%lux\n", arg0);
+ up->lastnote.flag = NDebug;
+ /* fall through */
+
+ case NDFLT:
+ if(up->lastnote.flag == NDebug){
+ qunlock(&up->debug);
+ pprint("suicide: %s\n", up->lastnote.msg);
+ } else
+ qunlock(&up->debug);
+ pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
+ }
+}
+
+uintptr
+execregs(uintptr entry, ulong ssize, ulong nargs)
+{
+ ulong *sp;
+ Ureg *ureg;
+
+ up->fpstate = FPinit;
+ fpoff();
+
+ sp = (ulong*)(USTKTOP - ssize);
+ *--sp = nargs;
+
+ ureg = up->dbgreg;
+ ureg->usp = (ulong)sp;
+ ureg->pc = entry;
+// print("execregs returns 0x%x\n", USTKTOP-sizeof(Tos));
+ return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */
+}
+
+/*
+ * return the userpc the last exception happened at
+ */
+ulong
+userpc(void)
+{
+ Ureg *ureg;
+
+ ureg = (Ureg*)up->dbgreg;
+ return ureg->pc;
+}
+
+/* This routine must save the values of registers the user is not permitted
+ * to write from devproc and then restore the saved values before returning.
+ */
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+ ulong flags;
+ ulong cs;
+ ulong ss;
+
+ flags = ureg->flags;
+ cs = ureg->cs;
+ ss = ureg->ss;
+ memmove(pureg, uva, n);
+ ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
+ ureg->cs = cs;
+ ureg->ss = ss;
+}
+
+static void
+linkproc(void)
+{
+ spllo();
+ up->kpfun(up->kparg);
+ pexit("kproc dying", 0);
+}
+
+void
+kprocchild(Proc* p, void (*func)(void*), void* arg)
+{
+ /*
+ * gotolabel() needs a word on the stack in
+ * which to place the return PC used to jump
+ * to linkproc().
+ */
+ p->sched.pc = (ulong)linkproc;
+ p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
+
+ p->kpfun = func;
+ p->kparg = arg;
+}
+
+void
+forkchild(Proc *p, Ureg *ureg)
+{
+ Ureg *cureg;
+
+ /*
+ * Add 2*BY2WD to the stack to account for
+ * - the return PC
+ * - trap's argument (ur)
+ */
+ p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
+ p->sched.pc = (ulong)forkret;
+
+ cureg = (Ureg*)(p->sched.sp+2*BY2WD);
+ memmove(cureg, ureg, sizeof(Ureg));
+ /* return value of syscall in child */
+ cureg->ax = 0;
+
+ /* Things from bottom of syscall which were never executed */
+ p->psstate = 0;
+ p->insyscall = 0;
+}
+
+/* Give enough context in the ureg to produce a kernel stack for
+ * a sleeping process
+ */
+void
+setkernur(Ureg* ureg, Proc* p)
+{
+ ureg->pc = p->sched.pc;
+ ureg->sp = p->sched.sp+4;
+}
+
+ulong
+dbgpc(Proc *p)
+{
+ Ureg *ureg;
+
+ ureg = p->dbgreg;
+ if(ureg == 0)
+ return 0;
+
+ return ureg->pc;
+}
+
+/*
+ * install_safe_pf_handler / install_normal_pf_handler:
+ *
+ * These are used within the failsafe_callback handler in entry.S to avoid
+ * taking a full page fault when reloading FS and GS. This is because FS and
+ * GS could be invalid at pretty much any point while Xenolinux executes (we
+ * don't set them to safe values on entry to the kernel). At *any* point Xen
+ * may be entered due to a hardware interrupt --- on exit from Xen an invalid
+ * FS/GS will cause our failsafe_callback to be executed. This could occur,
+ * for example, while the mmu_update_queue is in an inconsistent state. This
+ * is disastrous because the normal page-fault handler touches the update
+ * queue!
+ *
+ * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS
+ * to zero if they cannot be reloaded -- at this point executing a normal
+ * page fault would not change this effect. The safe page-fault handler
+ * ensures this end result (blow away the selector value) without the dangers
+ * of the normal page-fault handler.
+ *
+ * NB. Perhaps this can all go away after we have implemented writeable
+ * page tables. :-)
+ */
+static void
+safe_fault386(Ureg* , void* ) {
+ panic("DO SAFE PAGE FAULT!\n");
+
+
+
+}
+
+unsigned long install_safe_pf_handler(void)
+{
+ dprint("called from failsafe callback\n");
+ trapenable(VectorPF, safe_fault386, 0, "safe_fault386");
+ return 0;
+}
+
+void install_normal_pf_handler(unsigned long)
+{
+ trapenable(VectorPF, fault386, 0, "fault386");
+}
--- /dev/null
+++ b/sys/src/9/xen/uartxen.c
@@ -1,0 +1,334 @@
+/*
+ * xencons.c
+ * Access to xen consoles.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../pc/io.h"
+
+extern PhysUart xenphysuart;
+
+static Uart xenuart = {
+ .name = "xencons",
+ .freq = 1843200,
+ .phys = &xenphysuart,
+};
+
+struct {
+ struct xencons_interface *intf;
+ int evtchn;
+ Lock txlock;
+} xencons;
+
+/*
+ * Debug print to xen "emergency console".
+ * Output only appears if xen is built with verbose=y
+ */
+void
+dprint(char *fmt, ...)
+{
+ int n;
+ va_list arg;
+ char buf[PRINTSIZE];
+
+ va_start(arg, fmt);
+ n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+ va_end(arg);
+ HYPERVISOR_console_io(CONSOLEIO_write, n, buf);
+}
+
+static void kick(Uart*);
+/*
+ * Emit a string to the guest OS console, bypassing the queue
+ * - before serialoq is initialised
+ * - when rdb is activated
+ * - from iprint() for messages from interrupt routines
+ * If ring is full, just throw extra output away.
+ */
+void
+xenuartputs(char *s, int n)
+{
+ struct xencons_interface *con = xencons.intf;
+ unsigned long prod;
+ int c;
+
+ ilock(&xencons.txlock);
+ prod = con->out_prod;
+ while (n-- > 0 && (prod - con->out_cons) < sizeof(con->out)) {
+ c = *s++;
+ /*
+ if (c == '\n')
+ con->out[MASK_XENCONS_IDX(prod++, con->out)] = '\r';
+ */
+ con->out[MASK_XENCONS_IDX(prod++, con->out)] = c;
+ }
+ coherence();
+ con->out_prod = prod;
+ xenchannotify(xencons.evtchn);
+ iunlock(&xencons.txlock);
+}
+
+/*
+ * Handle channel event from console
+ */
+static void
+interrupt(Ureg*, void *arg)
+{
+ char c;
+ unsigned long cons;
+ Uart *uart;
+ struct xencons_interface *con = xencons.intf;
+
+ uart = &xenuart;
+
+ cons = con->in_cons;
+ coherence();
+ while (cons != con->in_prod) {
+ c = con->in[MASK_XENCONS_IDX(cons++, con->in)];
+ uartrecv(uart, c & 0xFF);
+ }
+ coherence();
+ con->in_cons = cons;
+ kick(nil);
+}
+
+static Uart*
+pnp(void)
+{
+ return &xenuart;
+}
+
+static void
+enable(Uart*, int ie)
+{
+ if(ie)
+ intrenable(xencons.evtchn, interrupt, 0, BUSUNKNOWN, "Xen console");
+}
+
+static void
+disable(Uart*)
+{
+}
+
+/*
+ * Send queued output to guest OS console
+ */
+static void
+kick(Uart*)
+{
+ struct xencons_interface *con = xencons.intf;
+ unsigned long prod;
+ long avail, idx, n, m;
+
+ ilock(&xencons.txlock);
+ prod = con->out_prod;
+ avail = sizeof(con->out) - (prod - con->out_cons);
+ while (avail > 0) {
+ idx = MASK_XENCONS_IDX(prod, con->out);
+ m = sizeof(con->out) - idx;
+ if (m > avail)
+ m = avail;
+ n = qconsume(serialoq, con->out+idx, m);
+ if (n < 0)
+ break;
+ prod += n;
+ avail -= n;
+ }
+ coherence();
+ con->out_prod = prod;
+ xenchannotify(xencons.evtchn);
+ iunlock(&xencons.txlock);
+}
+
+static void
+donothing(Uart*, int)
+{
+}
+
+static int
+donothingint(Uart*, int)
+{
+ return 0;
+}
+
+static int
+baud(Uart *uart, int n)
+{
+ if(n <= 0)
+ return -1;
+
+ uart->baud = n;
+ return 0;
+}
+
+static int
+bits(Uart *uart, int n)
+{
+ switch(n){
+ case 7:
+ case 8:
+ break;
+ default:
+ return -1;
+ }
+
+ uart->bits = n;
+ return 0;
+}
+
+static int
+stop(Uart *uart, int n)
+{
+ if(n != 1)
+ return -1;
+ uart->stop = n;
+ return 0;
+}
+
+static int
+parity(Uart *uart, int n)
+{
+ if(n != 'n')
+ return -1;
+ uart->parity = n;
+ return 0;
+}
+
+static long
+status(Uart *uart, void *buf, long n, long offset)
+{
+ char *p;
+
+ p = malloc(READSTR);
+ if(p == nil)
+ error(Enomem);
+ snprint(p, READSTR,
+ "b%d\n"
+ "dev(%d) type(%d) framing(%d) overruns(%d) "
+ "berr(%d) serr(%d)\n",
+
+ uart->baud,
+ uart->dev,
+ uart->type,
+ uart->ferr,
+ uart->oerr,
+ uart->berr,
+ uart->serr
+ );
+ n = readstr(offset, buf, n, p);
+ free(p);
+
+ return n;
+}
+
+void
+xenputc(Uart*, int c)
+{
+ struct xencons_interface *con = xencons.intf;
+ unsigned long prod;
+
+ c &= 0xFF;
+
+ ilock(&xencons.txlock);
+ /*
+ while(con->out_cons == con->out_prod)
+ HYPERVISOR_yield();
+ */
+ if(con->out_cons == con->out_prod){
+ iunlock(&xencons.txlock);
+ return;
+ }
+
+ prod = con->out_prod;
+
+ if((con->out[MASK_XENCONS_IDX(prod++, con->out)] = c) == '\n')
+ con->out[MASK_XENCONS_IDX(prod++, con->out)] = '\r';
+
+ coherence();
+ con->out_prod = prod;
+ xenchannotify(xencons.evtchn);
+ iunlock(&xencons.txlock);
+}
+
+int
+xengetc(Uart*)
+{
+ struct xencons_interface *con = xencons.intf;
+ char c;
+
+ c = 0;
+
+ if(con->in_cons != con->in_prod){
+ coherence();
+ c = con->in[MASK_XENCONS_IDX(con->in_cons++, con->in)];
+ if (con->in_cons == con->in_prod)
+ xenchannotify(xencons.evtchn);
+ }
+
+ return c;
+}
+
+PhysUart xenphysuart = {
+ .name = "xenuart",
+
+ .pnp = pnp,
+ .enable = enable,
+ .disable = disable,
+ .kick = kick,
+ .dobreak = donothing,
+ .baud = baud,
+ .bits = bits,
+ .stop = stop,
+ .parity = parity,
+ .modemctl = donothing,
+ .rts = donothing,
+ .dtr = donothing,
+ .status = status,
+ .fifo = donothing,
+
+ .getc = xengetc,
+ .putc = xenputc,
+};
+
+/* console=0 to enable */
+void
+xenconsinit(void)
+{
+ xencons.intf = (struct xencons_interface*)mmumapframe(XENCONSOLE, xenstart->console_mfn);
+ xencons.evtchn = xenstart->console_evtchn;
+
+ consuart = &xenuart;
+}
+
+void
+kbdenable(void)
+{
+ Uart *uart;
+ int n;
+ char *p, *cmd;
+
+ if((p = getconf("console")) == nil)
+ return;
+ n = strtoul(p, &cmd, 0);
+ if(p == cmd || n != 0)
+ return;
+ uart = &xenuart;
+
+ (*uart->phys->enable)(uart, 0);
+ uartctl(uart, "b9600 l8 pn s1");
+ if(*cmd != '\0')
+ uartctl(uart, cmd);
+
+ consuart = uart;
+ uart->console = 1;
+
+ uartputs("CONSOLE1\n", 9);
+
+ //*(char*)0 = 0;
+}
+
--- /dev/null
+++ b/sys/src/9/xen/xen-public/COPYING
@@ -1,0 +1,38 @@
+XEN NOTICE
+==========
+
+This copyright applies to all files within this subdirectory and its
+subdirectories:
+ include/public/*.h
+ include/public/hvm/*.h
+ include/public/io/*.h
+
+The intention is that these files can be freely copied into the source
+tree of an operating system when porting that OS to run on Xen. Doing
+so does *not* cause the OS to become subject to the terms of the GPL.
+
+All other files in the Xen source distribution are covered by version
+2 of the GNU General Public License except where explicitly stated
+otherwise within individual source files.
+
+ -- Keir Fraser (on behalf of the Xen team)
+
+=====================================================================
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-arm.h
@@ -1,0 +1,252 @@
+/******************************************************************************
+ * arch-arm.h
+ *
+ * Guest OS interface to ARM Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright 2011 (C) Citrix Systems
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_ARM_H__
+#define __XEN_PUBLIC_ARCH_ARM_H__
+
+/* hypercall calling convention
+ * ----------------------------
+ *
+ * A hypercall is issued using the ARM HVC instruction.
+ *
+ * A hypercall can take up to 5 arguments. These are passed in
+ * registers, the first argument in x0/r0 (for arm64/arm32 guests
+ * respectively irrespective of whether the underlying hypervisor is
+ * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2,
+ * the forth in x3/r3 and the fifth in x4/r4.
+ *
+ * The hypercall number is passed in r12 (arm) or x16 (arm64). In both
+ * cases the relevant ARM procedure calling convention specifies this
+ * is an inter-procedure-call scratch register (e.g. for use in linker
+ * stubs). This use does not conflict with use during a hypercall.
+ *
+ * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG.
+ *
+ * The return value is in x0/r0.
+ *
+ * The hypercall will clobber x16/r12 and the argument registers used
+ * by that hypercall (except r0 which is the return value) i.e. in
+ * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a
+ * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3.
+ *
+ * Parameter structs passed to hypercalls are laid out according to
+ * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA
+ * EABI) and Procedure Call Standard for the ARM 64-bit Architecture
+ * (AAPCS64). Where there is a conflict the 64-bit standard should be
+ * used regardless of guest type. Structures which are passed as
+ * hypercall arguments are always little endian.
+ */
+
+#define XEN_HYPERCALL_TAG 0XEA1
+
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+
+#ifndef __ASSEMBLY__
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef union { type *p; unsigned long q; } \
+ __guest_handle_ ## name; \
+ typedef union { type *p; uint64_aligned_t q; } \
+ __guest_handle_64_ ## name;
+
+/*
+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
+ * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes
+ * aligned.
+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
+ * hypercall argument. It is 4 bytes on aarch and 8 bytes on aarch64.
+ */
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+ ___DEFINE_XEN_GUEST_HANDLE(name, type); \
+ ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define __XEN_GUEST_HANDLE(name) __guest_handle_64_ ## name
+#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name)
+/* this is going to be changed on 64 bit */
+#define XEN_GUEST_HANDLE_PARAM(name) __guest_handle_ ## name
+#define set_xen_guest_handle_raw(hnd, val) \
+ do { \
+ typeof(&(hnd)) _sxghr_tmp = &(hnd); \
+ _sxghr_tmp->q = 0; \
+ _sxghr_tmp->p = val; \
+ } while ( 0 )
+#ifdef __XEN_TOOLS__
+#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
+#endif
+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */
+# define __DECL_REG(n64, n32) union { \
+ uint64_t n64; \
+ uint32_t n32; \
+ }
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */
+#define __DECL_REG(n64, n32) uint64_t n64
+#endif
+
+struct vcpu_guest_core_regs
+{
+ /* Aarch64 Aarch32 */
+ __DECL_REG(x0, r0_usr);
+ __DECL_REG(x1, r1_usr);
+ __DECL_REG(x2, r2_usr);
+ __DECL_REG(x3, r3_usr);
+ __DECL_REG(x4, r4_usr);
+ __DECL_REG(x5, r5_usr);
+ __DECL_REG(x6, r6_usr);
+ __DECL_REG(x7, r7_usr);
+ __DECL_REG(x8, r8_usr);
+ __DECL_REG(x9, r9_usr);
+ __DECL_REG(x10, r10_usr);
+ __DECL_REG(x11, r11_usr);
+ __DECL_REG(x12, r12_usr);
+
+ __DECL_REG(x13, sp_usr);
+ __DECL_REG(x14, lr_usr);
+
+ __DECL_REG(x15, __unused_sp_hyp);
+
+ __DECL_REG(x16, lr_irq);
+ __DECL_REG(x17, sp_irq);
+
+ __DECL_REG(x18, lr_svc);
+ __DECL_REG(x19, sp_svc);
+
+ __DECL_REG(x20, lr_abt);
+ __DECL_REG(x21, sp_abt);
+
+ __DECL_REG(x22, lr_und);
+ __DECL_REG(x23, sp_und);
+
+ __DECL_REG(x24, r8_fiq);
+ __DECL_REG(x25, r9_fiq);
+ __DECL_REG(x26, r10_fiq);
+ __DECL_REG(x27, r11_fiq);
+ __DECL_REG(x28, r12_fiq);
+
+ __DECL_REG(x29, sp_fiq);
+ __DECL_REG(x30, lr_fiq);
+
+ /* Return address and mode */
+ __DECL_REG(pc64, pc32); /* ELR_EL2 */
+ uint32_t cpsr; /* SPSR_EL2 */
+
+ union {
+ uint32_t spsr_el1; /* AArch64 */
+ uint32_t spsr_svc; /* AArch32 */
+ };
+
+ /* AArch32 guests only */
+ uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt;
+
+ /* AArch64 guests only */
+ uint64_t sp_el0;
+ uint64_t sp_el1, elr_el1;
+};
+typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t);
+
+#undef __DECL_REG
+
+typedef uint64_t xen_pfn_t;
+#define PRI_xen_pfn PRIx64
+
+/* Maximum number of virtual CPUs in legacy multi-processor guests. */
+/* Only one. All other VCPUS must use VCPUOP_register_vcpu_info */
+#define XEN_LEGACY_MAX_VCPUS 1
+
+typedef uint64_t xen_ulong_t;
+#define PRI_xen_ulong PRIx64
+
+struct vcpu_guest_context {
+#define _VGCF_online 0
+#define VGCF_online (1<<_VGCF_online)
+ uint32_t flags; /* VGCF_* */
+
+ struct vcpu_guest_core_regs user_regs; /* Core CPU registers */
+
+ uint32_t sctlr, ttbcr;
+ uint64_t ttbr0, ttbr1;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+struct arch_vcpu_info { };
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct arch_shared_info { };
+typedef struct arch_shared_info arch_shared_info_t;
+typedef uint64_t xen_callback_t;
+
+#endif /* ifndef __ASSEMBLY __ */
+
+/* PSR bits (CPSR, SPSR)*/
+
+/* 32 bit modes */
+#define PSR_MODE_USR 0x10
+#define PSR_MODE_FIQ 0x11
+#define PSR_MODE_IRQ 0x12
+#define PSR_MODE_SVC 0x13
+#define PSR_MODE_MON 0x16
+#define PSR_MODE_ABT 0x17
+#define PSR_MODE_HYP 0x1a
+#define PSR_MODE_UND 0x1b
+#define PSR_MODE_SYS 0x1f
+
+/* 64 bit modes */
+#ifdef __aarch64__
+#define PSR_MODE_BIT 0x10 /* Set iff AArch32 */
+#define PSR_MODE_EL3h 0x0d
+#define PSR_MODE_EL3t 0x0c
+#define PSR_MODE_EL2h 0x09
+#define PSR_MODE_EL2t 0x08
+#define PSR_MODE_EL1h 0x05
+#define PSR_MODE_EL1t 0x04
+#define PSR_MODE_EL0t 0x00
+#endif
+
+#define PSR_THUMB (1<<5) /* Thumb Mode enable */
+#define PSR_FIQ_MASK (1<<6) /* Fast Interrupt mask */
+#define PSR_IRQ_MASK (1<<7) /* Interrupt mask */
+#define PSR_ABT_MASK (1<<8) /* Asynchronous Abort mask */
+#define PSR_BIG_ENDIAN (1<<9) /* Big Endian Mode */
+#define PSR_IT_MASK (0x0600fc00) /* Thumb If-Then Mask */
+#define PSR_JAZELLE (1<<24) /* Jazelle Mode */
+
+#define PSR_GUEST_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC)
+
+#endif /* __XEN_PUBLIC_ARCH_ARM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-arm/hvm/save.h
@@ -1,0 +1,39 @@
+/*
+ * Structure definitions for HVM state that is held by Xen and must
+ * be saved along with the domain's memory and device-model state.
+ *
+ * Copyright (c) 2012 Citrix Systems Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__
+#define __XEN_PUBLIC_HVM_SAVE_ARM_H__
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86/cpuid.h
@@ -1,0 +1,68 @@
+/******************************************************************************
+ * arch-x86/cpuid.h
+ *
+ * CPUID interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
+
+/* Xen identification leaves start at 0x40000000. */
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000000)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ * are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ * of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000001)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000002)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ * to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86/hvm/save.h
@@ -1,0 +1,600 @@
+/*
+ * Structure definitions for HVM state that is held by Xen and must
+ * be saved along with the domain's memory and device-model state.
+ *
+ * Copyright (c) 2007 XenSource Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__
+#define __XEN_PUBLIC_HVM_SAVE_X86_H__
+
+/*
+ * Save/restore header: general info about the save file.
+ */
+
+#define HVM_FILE_MAGIC 0x54381286
+#define HVM_FILE_VERSION 0x00000001
+
+struct hvm_save_header {
+ uint32_t magic; /* Must be HVM_FILE_MAGIC */
+ uint32_t version; /* File format version */
+ uint64_t changeset; /* Version of Xen that saved this file */
+ uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */
+ uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */
+};
+
+DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
+
+
+/*
+ * Processor
+ *
+ * Compat: Pre-3.4 didn't have msr_tsc_aux
+ */
+
+struct hvm_hw_cpu {
+ uint8_t fpu_regs[512];
+
+ uint64_t rax;
+ uint64_t rbx;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t rsp;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+
+ uint64_t rip;
+ uint64_t rflags;
+
+ uint64_t cr0;
+ uint64_t cr2;
+ uint64_t cr3;
+ uint64_t cr4;
+
+ uint64_t dr0;
+ uint64_t dr1;
+ uint64_t dr2;
+ uint64_t dr3;
+ uint64_t dr6;
+ uint64_t dr7;
+
+ uint32_t cs_sel;
+ uint32_t ds_sel;
+ uint32_t es_sel;
+ uint32_t fs_sel;
+ uint32_t gs_sel;
+ uint32_t ss_sel;
+ uint32_t tr_sel;
+ uint32_t ldtr_sel;
+
+ uint32_t cs_limit;
+ uint32_t ds_limit;
+ uint32_t es_limit;
+ uint32_t fs_limit;
+ uint32_t gs_limit;
+ uint32_t ss_limit;
+ uint32_t tr_limit;
+ uint32_t ldtr_limit;
+ uint32_t idtr_limit;
+ uint32_t gdtr_limit;
+
+ uint64_t cs_base;
+ uint64_t ds_base;
+ uint64_t es_base;
+ uint64_t fs_base;
+ uint64_t gs_base;
+ uint64_t ss_base;
+ uint64_t tr_base;
+ uint64_t ldtr_base;
+ uint64_t idtr_base;
+ uint64_t gdtr_base;
+
+ uint32_t cs_arbytes;
+ uint32_t ds_arbytes;
+ uint32_t es_arbytes;
+ uint32_t fs_arbytes;
+ uint32_t gs_arbytes;
+ uint32_t ss_arbytes;
+ uint32_t tr_arbytes;
+ uint32_t ldtr_arbytes;
+
+ uint64_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+
+ /* msr for em64t */
+ uint64_t shadow_gs;
+
+ /* msr content saved/restored. */
+ uint64_t msr_flags;
+ uint64_t msr_lstar;
+ uint64_t msr_star;
+ uint64_t msr_cstar;
+ uint64_t msr_syscall_mask;
+ uint64_t msr_efer;
+ uint64_t msr_tsc_aux;
+
+ /* guest's idea of what rdtsc() would return */
+ uint64_t tsc;
+
+ /* pending event, if any */
+ union {
+ uint32_t pending_event;
+ struct {
+ uint8_t pending_vector:8;
+ uint8_t pending_type:3;
+ uint8_t pending_error_valid:1;
+ uint32_t pending_reserved:19;
+ uint8_t pending_valid:1;
+ };
+ };
+ /* error code for pending event */
+ uint32_t error_code;
+};
+
+struct hvm_hw_cpu_compat {
+ uint8_t fpu_regs[512];
+
+ uint64_t rax;
+ uint64_t rbx;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t rsp;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+
+ uint64_t rip;
+ uint64_t rflags;
+
+ uint64_t cr0;
+ uint64_t cr2;
+ uint64_t cr3;
+ uint64_t cr4;
+
+ uint64_t dr0;
+ uint64_t dr1;
+ uint64_t dr2;
+ uint64_t dr3;
+ uint64_t dr6;
+ uint64_t dr7;
+
+ uint32_t cs_sel;
+ uint32_t ds_sel;
+ uint32_t es_sel;
+ uint32_t fs_sel;
+ uint32_t gs_sel;
+ uint32_t ss_sel;
+ uint32_t tr_sel;
+ uint32_t ldtr_sel;
+
+ uint32_t cs_limit;
+ uint32_t ds_limit;
+ uint32_t es_limit;
+ uint32_t fs_limit;
+ uint32_t gs_limit;
+ uint32_t ss_limit;
+ uint32_t tr_limit;
+ uint32_t ldtr_limit;
+ uint32_t idtr_limit;
+ uint32_t gdtr_limit;
+
+ uint64_t cs_base;
+ uint64_t ds_base;
+ uint64_t es_base;
+ uint64_t fs_base;
+ uint64_t gs_base;
+ uint64_t ss_base;
+ uint64_t tr_base;
+ uint64_t ldtr_base;
+ uint64_t idtr_base;
+ uint64_t gdtr_base;
+
+ uint32_t cs_arbytes;
+ uint32_t ds_arbytes;
+ uint32_t es_arbytes;
+ uint32_t fs_arbytes;
+ uint32_t gs_arbytes;
+ uint32_t ss_arbytes;
+ uint32_t tr_arbytes;
+ uint32_t ldtr_arbytes;
+
+ uint64_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+
+ /* msr for em64t */
+ uint64_t shadow_gs;
+
+ /* msr content saved/restored. */
+ uint64_t msr_flags;
+ uint64_t msr_lstar;
+ uint64_t msr_star;
+ uint64_t msr_cstar;
+ uint64_t msr_syscall_mask;
+ uint64_t msr_efer;
+ /*uint64_t msr_tsc_aux; COMPAT */
+
+ /* guest's idea of what rdtsc() would return */
+ uint64_t tsc;
+
+ /* pending event, if any */
+ union {
+ uint32_t pending_event;
+ struct {
+ uint8_t pending_vector:8;
+ uint8_t pending_type:3;
+ uint8_t pending_error_valid:1;
+ uint32_t pending_reserved:19;
+ uint8_t pending_valid:1;
+ };
+ };
+ /* error code for pending event */
+ uint32_t error_code;
+};
+
+static inline int _hvm_hw_fix_cpu(void *h) {
+
+ union hvm_hw_cpu_union {
+ struct hvm_hw_cpu nat;
+ struct hvm_hw_cpu_compat cmp;
+ } *ucpu = (union hvm_hw_cpu_union *)h;
+
+ /* If we copy from the end backwards, we should
+ * be able to do the modification in-place */
+ ucpu->nat.error_code = ucpu->cmp.error_code;
+ ucpu->nat.pending_event = ucpu->cmp.pending_event;
+ ucpu->nat.tsc = ucpu->cmp.tsc;
+ ucpu->nat.msr_tsc_aux = 0;
+
+ return 0;
+}
+
+DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \
+ struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu);
+
+/*
+ * PIC
+ */
+
+struct hvm_hw_vpic {
+ /* IR line bitmasks. */
+ uint8_t irr;
+ uint8_t imr;
+ uint8_t isr;
+
+ /* Line IRx maps to IRQ irq_base+x */
+ uint8_t irq_base;
+
+ /*
+ * Where are we in ICW2-4 initialisation (0 means no init in progress)?
+ * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1).
+ * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence)
+ * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence)
+ */
+ uint8_t init_state:4;
+
+ /* IR line with highest priority. */
+ uint8_t priority_add:4;
+
+ /* Reads from A=0 obtain ISR or IRR? */
+ uint8_t readsel_isr:1;
+
+ /* Reads perform a polling read? */
+ uint8_t poll:1;
+
+ /* Automatically clear IRQs from the ISR during INTA? */
+ uint8_t auto_eoi:1;
+
+ /* Automatically rotate IRQ priorities during AEOI? */
+ uint8_t rotate_on_auto_eoi:1;
+
+ /* Exclude slave inputs when considering in-service IRQs? */
+ uint8_t special_fully_nested_mode:1;
+
+ /* Special mask mode excludes masked IRs from AEOI and priority checks. */
+ uint8_t special_mask_mode:1;
+
+ /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */
+ uint8_t is_master:1;
+
+ /* Edge/trigger selection. */
+ uint8_t elcr;
+
+ /* Virtual INT output. */
+ uint8_t int_output;
+};
+
+DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic);
+
+
+/*
+ * IO-APIC
+ */
+
+#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */
+
+struct hvm_hw_vioapic {
+ uint64_t base_address;
+ uint32_t ioregsel;
+ uint32_t id;
+ union vioapic_redir_entry
+ {
+ uint64_t bits;
+ struct {
+ uint8_t vector;
+ uint8_t delivery_mode:3;
+ uint8_t dest_mode:1;
+ uint8_t delivery_status:1;
+ uint8_t polarity:1;
+ uint8_t remote_irr:1;
+ uint8_t trig_mode:1;
+ uint8_t mask:1;
+ uint8_t reserve:7;
+ uint8_t reserved[4];
+ uint8_t dest_id;
+ } fields;
+ } redirtbl[VIOAPIC_NUM_PINS];
+};
+
+DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
+
+
+/*
+ * LAPIC
+ */
+
+struct hvm_hw_lapic {
+ uint64_t apic_base_msr;
+ uint32_t disabled; /* VLAPIC_xx_DISABLED */
+ uint32_t timer_divisor;
+ uint64_t tdt_msr;
+};
+
+DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic);
+
+struct hvm_hw_lapic_regs {
+ uint8_t data[1024];
+};
+
+DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs);
+
+
+/*
+ * IRQs
+ */
+
+struct hvm_hw_pci_irqs {
+ /*
+ * Virtual interrupt wires for a single PCI bus.
+ * Indexed by: device*4 + INTx#.
+ */
+ union {
+ unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */
+ uint64_t pad[2];
+ };
+};
+
+DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs);
+
+struct hvm_hw_isa_irqs {
+ /*
+ * Virtual interrupt wires for ISA devices.
+ * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
+ */
+ union {
+ unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */
+ uint64_t pad[1];
+ };
+};
+
+DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs);
+
+struct hvm_hw_pci_link {
+ /*
+ * PCI-ISA interrupt router.
+ * Each PCI <device:INTx#> is 'wire-ORed' into one of four links using
+ * the traditional 'barber's pole' mapping ((device + INTx#) & 3).
+ * The router provides a programmable mapping from each link to a GSI.
+ */
+ uint8_t route[4];
+ uint8_t pad0[4];
+};
+
+DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link);
+
+/*
+ * PIT
+ */
+
+struct hvm_hw_pit {
+ struct hvm_hw_pit_channel {
+ uint32_t count; /* can be 65536 */
+ uint16_t latched_count;
+ uint8_t count_latched;
+ uint8_t status_latched;
+ uint8_t status;
+ uint8_t read_state;
+ uint8_t write_state;
+ uint8_t write_latch;
+ uint8_t rw_mode;
+ uint8_t mode;
+ uint8_t bcd; /* not supported */
+ uint8_t gate; /* timer start */
+ } channels[3]; /* 3 x 16 bytes */
+ uint32_t speaker_data_on;
+ uint32_t pad0;
+};
+
+DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit);
+
+
+/*
+ * RTC
+ */
+
+#define RTC_CMOS_SIZE 14
+struct hvm_hw_rtc {
+ /* CMOS bytes */
+ uint8_t cmos_data[RTC_CMOS_SIZE];
+ /* Index register for 2-part operations */
+ uint8_t cmos_index;
+ uint8_t pad0;
+};
+
+DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc);
+
+
+/*
+ * HPET
+ */
+
+#define HPET_TIMER_NUM 3 /* 3 timers supported now */
+struct hvm_hw_hpet {
+ /* Memory-mapped, software visible registers */
+ uint64_t capability; /* capabilities */
+ uint64_t res0; /* reserved */
+ uint64_t config; /* configuration */
+ uint64_t res1; /* reserved */
+ uint64_t isr; /* interrupt status reg */
+ uint64_t res2[25]; /* reserved */
+ uint64_t mc64; /* main counter */
+ uint64_t res3; /* reserved */
+ struct { /* timers */
+ uint64_t config; /* configuration/cap */
+ uint64_t cmp; /* comparator */
+ uint64_t fsb; /* FSB route, not supported now */
+ uint64_t res4; /* reserved */
+ } timers[HPET_TIMER_NUM];
+ uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */
+
+ /* Hidden register state */
+ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+};
+
+DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);
+
+
+/*
+ * PM timer
+ */
+
+struct hvm_hw_pmtimer {
+ uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */
+ uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */
+ uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */
+};
+
+DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer);
+
+/*
+ * MTRR MSRs
+ */
+
+struct hvm_hw_mtrr {
+#define MTRR_VCNT 8
+#define NUM_FIXED_MSR 11
+ uint64_t msr_pat_cr;
+ /* mtrr physbase & physmask msr pair*/
+ uint64_t msr_mtrr_var[MTRR_VCNT*2];
+ uint64_t msr_mtrr_fixed[NUM_FIXED_MSR];
+ uint64_t msr_mtrr_cap;
+ uint64_t msr_mtrr_def_type;
+};
+
+DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);
+
+/*
+ * The save area of XSAVE/XRSTOR.
+ */
+
+struct hvm_hw_cpu_xsave {
+ uint64_t xfeature_mask;
+ uint64_t xcr0; /* Updated by XSETBV */
+ uint64_t xcr0_accum; /* Updated by XSETBV */
+ struct {
+ struct { char x[512]; } fpu_sse;
+
+ struct {
+ uint64_t xstate_bv; /* Updated by XRSTOR */
+ uint64_t reserved[7];
+ } xsave_hdr; /* The 64-byte header */
+
+ struct { char x[0]; } ymm; /* YMM */
+ } save_area;
+};
+
+#define CPU_XSAVE_CODE 16
+
+/*
+ * Viridian hypervisor context.
+ */
+
+struct hvm_viridian_domain_context {
+ uint64_t hypercall_gpa;
+ uint64_t guest_os_id;
+};
+
+DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context);
+
+struct hvm_viridian_vcpu_context {
+ uint64_t apic_assist;
+};
+
+DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context);
+
+struct hvm_vmce_vcpu {
+ uint64_t caps;
+ uint64_t mci_ctl2_bank0;
+ uint64_t mci_ctl2_bank1;
+};
+
+DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu);
+
+struct hvm_tsc_adjust {
+ uint64_t tsc_adjust;
+};
+
+DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust);
+
+/*
+ * Largest type-code in use
+ */
+#define HVM_SAVE_CODE_MAX 19
+
+#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86/xen-mca.h
@@ -1,0 +1,440 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <[email protected]>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ * (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ * (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ * This is better done as separate task, physical CPU hotplugging,
+ * and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ * move a DomU (or Dom0 itself) away from a malicious page
+ * producing correctable errors.
+ * 9. offlining physical page:
+ * Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ * and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */
+#define XEN_MC_NONURGENT 0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */
+#define XEN_MC_URGENT 0x0002
+/* IN: Dom0 acknowledges previosly-fetched telemetry */
+#define XEN_MC_ACK 0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK 0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED 0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA 0x2
+/* OUT: Between notification time and this hypercall an other
+ * (most likely) correctable error happened. The fetched data,
+ * does not match the original machine check data. */
+#define XEN_MC_NOMATCH 0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers
+ */
+#define MC_TYPE_GLOBAL 0
+#define MC_TYPE_BANK 1
+#define MC_TYPE_EXTENDED 2
+#define MC_TYPE_RECOVERY 3
+
+struct mcinfo_common {
+ uint16_t type; /* structure type */
+ uint16_t size; /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE (1 << 0)
+#define MC_FLAG_UNCORRECTABLE (1 << 1)
+#define MC_FLAG_RECOVERABLE (1 << 2)
+#define MC_FLAG_POLLED (1 << 3)
+#define MC_FLAG_RESET (1 << 4)
+#define MC_FLAG_CMCI (1 << 5)
+#define MC_FLAG_MCE (1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+ struct mcinfo_common common;
+
+ /* running domain at the time in error (most likely the impacted one) */
+ uint16_t mc_domid;
+ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+ uint32_t mc_socketid; /* physical socket of the physical core */
+ uint16_t mc_coreid; /* physical impacted core */
+ uint16_t mc_core_threadid; /* core thread of physical core */
+ uint32_t mc_apicid;
+ uint32_t mc_flags;
+ uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+ struct mcinfo_common common;
+
+ uint16_t mc_bank; /* bank nr */
+ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+ * and if mc_addr is valid. Never valid on DomU. */
+ uint64_t mc_status; /* bank status */
+ uint64_t mc_addr; /* bank address, only valid
+ * if addr bit is set in mc_status */
+ uint64_t mc_misc;
+ uint64_t mc_ctrl2;
+ uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+ uint64_t reg; /* MSR */
+ uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+ struct mcinfo_common common;
+
+ /* You can fill up to five registers.
+ * If you need more, then use this structure
+ * multiple times. */
+
+ uint32_t mc_msrs; /* Number of msr with valid values. */
+ /*
+ * Currently Intel extended MSR (32/64) include all gp registers
+ * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+ * useful at present. So expand this array to 16/32 to leave room.
+ */
+ struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action
+{
+ /* Params for passing the offlined page number to DOM0 */
+ uint64_t mfn;
+ uint64_t status;
+};
+
+struct cpu_offline_action
+{
+ /* Params for passing the identity of the offlined CPU to DOM0 */
+ uint32_t mc_socketid;
+ uint16_t mc_coreid;
+ uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery
+{
+ struct mcinfo_common common;
+ uint16_t mc_bank; /* bank nr */
+ uint8_t action_flags;
+ uint8_t action_types;
+ union {
+ struct page_offline_action page_retire;
+ struct cpu_offline_action cpu_offline;
+ uint8_t pad[MAX_UNION_SIZE];
+ } action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE 1024
+#define MCINFO_MAXSIZE 768
+
+#define MCINFO_FLAGS_UNCOMPLETE 0x1
+struct mc_info {
+ /* Number of mcinfo_* entries in mi_data */
+ uint32_t mi_nentries;
+ uint32_t flags;
+ uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_XEN_GUEST_HANDLE(mc_info_t);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS 7 /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX 3 /* Linux-defined */
+#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+ uint32_t mc_cpunr;
+ uint32_t mc_chipid;
+ uint16_t mc_coreid;
+ uint16_t mc_threadid;
+ uint32_t mc_apicid;
+ uint32_t mc_clusterid;
+ uint32_t mc_ncores;
+ uint32_t mc_ncores_active;
+ uint32_t mc_nthreads;
+ int32_t mc_cpuid_level;
+ uint32_t mc_family;
+ uint32_t mc_vendor;
+ uint32_t mc_model;
+ uint32_t mc_step;
+ char mc_vendorid[16];
+ char mc_brandid[64];
+ uint32_t mc_cpu_caps[MC_NCAPS];
+ uint32_t mc_cache_size;
+ uint32_t mc_cache_alignment;
+ int32_t mc_nmsrvals;
+ struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ * uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi) \
+ (_mi)->mi_nentries
+/* Prototype:
+ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi) \
+ ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic) \
+ ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+#define x86_mcinfo_lookup(_ret, _mi, _type) \
+ do { \
+ uint32_t found, i; \
+ struct mcinfo_common *_mic; \
+ \
+ found = 0; \
+ (_ret) = NULL; \
+ if (_mi == NULL) break; \
+ _mic = x86_mcinfo_first(_mi); \
+ for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \
+ if (_mic->type == (_type)) { \
+ found = 1; \
+ break; \
+ } \
+ _mic = x86_mcinfo_next(_mic); \
+ } \
+ (_ret) = found ? _mic : NULL; \
+ } while (0)
+
+
+/* Usecase 1
+ * Register machine check trap callback handler
+ * (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch 1
+struct xen_mc_fetch {
+ /* IN/OUT variables. */
+ uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+ XEN_MC_ACK if ack'ing an earlier fetch */
+ /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
+ XEN_MC_NODATA, XEN_MC_NOMATCH */
+ uint32_t _pad0;
+ uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */
+
+ /* OUT variables. */
+ XEN_GUEST_HANDLE(mc_info_t) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain 2
+struct xen_mc_notifydomain {
+ /* IN variables. */
+ uint16_t mc_domid; /* The unprivileged domain to notify. */
+ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify.
+ * Usually echo'd value from the fetch hypercall. */
+
+ /* IN/OUT variables. */
+ uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+ /* IN/OUT */
+ uint32_t ncpus;
+ uint32_t _pad0;
+ /* OUT */
+ XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
+};
+
+#define XEN_MC_msrinject 4
+#define MC_MSRINJ_MAXMSRS 8
+struct xen_mc_msrinject {
+ /* IN */
+ uint32_t mcinj_cpunr; /* target processor id */
+ uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */
+ uint32_t mcinj_count; /* 0 .. count-1 in array are valid */
+ uint32_t _pad0;
+ struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE 0x1
+
+#define XEN_MC_mceinject 5
+struct xen_mc_mceinject {
+ unsigned int mceinj_cpunr; /* target processor id */
+};
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#define XEN_MC_inject_v2 6
+#define XEN_MC_INJECT_TYPE_MASK 0x7
+#define XEN_MC_INJECT_TYPE_MCE 0x0
+#define XEN_MC_INJECT_TYPE_CMCI 0x1
+
+#define XEN_MC_INJECT_CPU_BROADCAST 0x8
+
+struct xen_mc_inject_v2 {
+ uint32_t flags;
+ struct xenctl_bitmap cpumap;
+};
+#endif
+
+struct xen_mc {
+ uint32_t cmd;
+ uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+ union {
+ struct xen_mc_fetch mc_fetch;
+ struct xen_mc_notifydomain mc_notifydomain;
+ struct xen_mc_physcpuinfo mc_physcpuinfo;
+ struct xen_mc_msrinject mc_msrinject;
+ struct xen_mc_mceinject mc_mceinject;
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+ struct xen_mc_inject_v2 mc_inject_v2;
+#endif
+ } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_t);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86/xen-x86_32.h
@@ -1,0 +1,171 @@
+/******************************************************************************
+ * xen-x86_32.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2007, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+
+/*
+ * Hypercall interface:
+ * Input: %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6)
+ * Output: %eax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ * call hypercall_page + hypercall-number * 32
+ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx)
+ */
+
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
+#define FLAT_RING1_SS 0xe021 /* GDT index 260 */
+#define FLAT_RING3_CS 0xe02b /* GDT index 261 */
+#define FLAT_RING3_DS 0xe033 /* GDT index 262 */
+#define FLAT_RING3_SS 0xe033 /* GDT index 262 */
+
+#define FLAT_KERNEL_CS FLAT_RING1_CS
+#define FLAT_KERNEL_DS FLAT_RING1_DS
+#define FLAT_KERNEL_SS FLAT_RING1_SS
+#define FLAT_USER_CS FLAT_RING3_CS
+#define FLAT_USER_DS FLAT_RING3_DS
+#define FLAT_USER_SS FLAT_RING3_SS
+
+#define __HYPERVISOR_VIRT_START_PAE 0xF5800000
+#define __MACH2PHYS_VIRT_START_PAE 0xF5800000
+#define __MACH2PHYS_VIRT_END_PAE 0xF6800000
+#define HYPERVISOR_VIRT_START_PAE \
+ mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_START_PAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_END_PAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE)
+
+/* Non-PAE bounds are obsolete. */
+#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000
+#define HYPERVISOR_VIRT_START_NONPAE \
+ mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_START_NONPAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_END_NONPAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE)
+
+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
+#endif
+
+/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#undef ___DEFINE_XEN_GUEST_HANDLE
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef struct { type *p; } \
+ __guest_handle_ ## name; \
+ typedef struct { union { type *p; uint64_aligned_t q; }; } \
+ __guest_handle_64_ ## name
+#undef set_xen_guest_handle_raw
+#define set_xen_guest_handle_raw(hnd, val) \
+ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \
+ (hnd).p = val; \
+ } while ( 0 )
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
+#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name)
+#endif
+
+#ifndef __ASSEMBLY__
+
+struct cpu_user_regs {
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
+ uint32_t esi;
+ uint32_t edi;
+ uint32_t ebp;
+ uint32_t eax;
+ uint16_t error_code; /* private */
+ uint16_t entry_vector; /* private */
+ uint32_t eip;
+ uint16_t cs;
+ uint8_t saved_upcall_mask;
+ uint8_t _pad0;
+ uint32_t eflags; /* eflags.IF == !saved_upcall_mask */
+ uint32_t esp;
+ uint16_t ss, _pad1;
+ uint16_t es, _pad2;
+ uint16_t ds, _pad3;
+ uint16_t fs, _pad4;
+ uint16_t gs, _pad5;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct xen_callback {
+ unsigned long cs;
+ unsigned long eip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86/xen-x86_64.h
@@ -1,0 +1,202 @@
+/******************************************************************************
+ * xen-x86_64.h
+ *
+ * Guest OS interface to x86 64-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+
+/*
+ * Hypercall interface:
+ * Input: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6)
+ * Output: %rax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ * call hypercall_page + hypercall-number * 32
+ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi)
+ */
+
+/*
+ * 64-bit segment selectors
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+
+#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */
+#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */
+#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */
+#define FLAT_RING3_DS64 0x0000 /* NULL selector */
+#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */
+#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */
+
+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
+#define FLAT_KERNEL_DS FLAT_KERNEL_DS64
+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
+#define FLAT_KERNEL_CS FLAT_KERNEL_CS64
+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
+#define FLAT_KERNEL_SS FLAT_KERNEL_SS64
+
+#define FLAT_USER_DS64 FLAT_RING3_DS64
+#define FLAT_USER_DS32 FLAT_RING3_DS32
+#define FLAT_USER_DS FLAT_USER_DS64
+#define FLAT_USER_CS64 FLAT_RING3_CS64
+#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_USER_CS FLAT_USER_CS64
+#define FLAT_USER_SS64 FLAT_RING3_SS64
+#define FLAT_USER_SS32 FLAT_RING3_SS32
+#define FLAT_USER_SS FLAT_USER_SS64
+
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START 0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END 0xFFFF804000000000
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
+#endif
+
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/*
+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
+ * @which == SEGBASE_* ; @base == 64-bit base address
+ * Returns 0 on success.
+ */
+#define SEGBASE_FS 0
+#define SEGBASE_GS_USER 1
+#define SEGBASE_GS_KERNEL 2
+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
+
+/*
+ * int HYPERVISOR_iret(void)
+ * All arguments are on the kernel stack, in the following format.
+ * Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ * RING0 -> RING3 kernel mode.
+ * RING1 -> RING3 kernel mode.
+ * RING2 -> RING3 kernel mode.
+ * RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ * orb $3,1*8(%rsp)
+ * iretq
+ * If flags contains VGCF_in_syscall:
+ * Restore RAX, RIP, RFLAGS, RSP.
+ * Discard R11, RCX, CS, SS.
+ * Otherwise:
+ * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
+ * All other registers are saved on hypercall entry and restored to user.
+ */
+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
+#define _VGCF_in_syscall 8
+#define VGCF_in_syscall (1<<_VGCF_in_syscall)
+#define VGCF_IN_SYSCALL VGCF_in_syscall
+
+#ifndef __ASSEMBLY__
+
+struct iret_context {
+ /* Top of stack (%rsp at point of hypercall). */
+ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+ /* Bottom of iret stack frame. */
+};
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
+#define __DECL_REG(name) union { \
+ uint64_t r ## name, e ## name; \
+ uint32_t _e ## name; \
+}
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
+#define __DECL_REG(name) uint64_t r ## name
+#endif
+
+struct cpu_user_regs {
+ uint64_t r15;
+ uint64_t r14;
+ uint64_t r13;
+ uint64_t r12;
+ __DECL_REG(bp);
+ __DECL_REG(bx);
+ uint64_t r11;
+ uint64_t r10;
+ uint64_t r9;
+ uint64_t r8;
+ __DECL_REG(ax);
+ __DECL_REG(cx);
+ __DECL_REG(dx);
+ __DECL_REG(si);
+ __DECL_REG(di);
+ uint32_t error_code; /* private */
+ uint32_t entry_vector; /* private */
+ __DECL_REG(ip);
+ uint16_t cs, _pad0[1];
+ uint8_t saved_upcall_mask;
+ uint8_t _pad1[3];
+ __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */
+ __DECL_REG(sp);
+ uint16_t ss, _pad2[3];
+ uint16_t es, _pad3[3];
+ uint16_t ds, _pad4[3];
+ uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */
+ uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+#undef __DECL_REG
+
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+typedef unsigned long xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86/xen.h
@@ -1,0 +1,260 @@
+/******************************************************************************
+ * arch-x86/xen.h
+ *
+ * Guest OS interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "../xen.h"
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_H__
+
+/* Structural guest handles introduced in 0x00030201. */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef struct { type *p; } __guest_handle_ ## name
+#else
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef type * __guest_handle_ ## name
+#endif
+
+/*
+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
+ * in a struct in memory.
+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
+ * hypercall argument.
+ * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
+ * they might not be on other architectures.
+ */
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+ ___DEFINE_XEN_GUEST_HANDLE(name, type); \
+ ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name
+#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name)
+#define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name)
+#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0)
+#ifdef __XEN_TOOLS__
+#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
+#endif
+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
+
+#if defined(__i386__)
+#include "xen-x86_32.h"
+#elif defined(__x86_64__)
+#include "xen-x86_64.h"
+#endif
+
+#ifndef __ASSEMBLY__
+typedef unsigned long xen_pfn_t;
+#define PRI_xen_pfn "lx"
+#endif
+
+/*
+ * `incontents 200 segdesc Segment Descriptor Tables
+ */
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries);
+ * `
+ */
+/*
+ * A number of GDT entries are reserved by Xen. These are not situated at the
+ * start of the GDT because some stupid OSes export hard-coded selector values
+ * in their ABI. These hard-coded values are always near the start of the GDT,
+ * so Xen places itself out of the way, at the far end of the GDT.
+ *
+ * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
+ */
+#define FIRST_RESERVED_GDT_PAGE 14
+#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
+
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc);
+ * `
+ * ` @pa The machine physical address of the descriptor to
+ * ` update. Must be either a descriptor page or writable.
+ * ` @desc The descriptor value to update, in the same format as a
+ * ` native descriptor table entry.
+ */
+
+/* Maximum number of virtual CPUs in legacy multi-processor guests. */
+#define XEN_LEGACY_MAX_VCPUS 32
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long xen_ulong_t;
+#define PRI_xen_ulong "lx"
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp);
+ * `
+ * Sets the stack segment and pointer for the current vcpu.
+ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]);
+ * `
+ */
+/*
+ * Send an array of these to HYPERVISOR_set_trap_table().
+ * Terminate the array with a sentinel entry, with traps[].address==0.
+ * The privilege level specifies which modes may enter a trap via a software
+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
+ * privilege levels as follows:
+ * Level == 0: Noone may enter
+ * Level == 1: Kernel may enter
+ * Level == 2: Kernel may enter
+ * Level == 3: Everyone may enter
+ */
+#define TI_GET_DPL(_ti) ((_ti)->flags & 3)
+#define TI_GET_IF(_ti) ((_ti)->flags & 4)
+#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
+#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
+struct trap_info {
+ uint8_t vector; /* exception vector */
+ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
+ uint16_t cs; /* code selector */
+ unsigned long address; /* code offset */
+};
+typedef struct trap_info trap_info_t;
+DEFINE_XEN_GUEST_HANDLE(trap_info_t);
+
+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+/*
+ * The following is all CPU context. Note that the fpu_ctxt block is filled
+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ */
+struct vcpu_guest_context {
+ /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
+ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_IN_KERNEL (1<<2)
+#define _VGCF_i387_valid 0
+#define VGCF_i387_valid (1<<_VGCF_i387_valid)
+#define _VGCF_in_kernel 2
+#define VGCF_in_kernel (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events 4
+#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events)
+#define _VGCF_online 5
+#define VGCF_online (1<<_VGCF_online)
+ unsigned long flags; /* VGCF_* flags */
+ struct cpu_user_regs user_regs; /* User-level CPU registers */
+ struct trap_info trap_ctxt[256]; /* Virtual IDT */
+ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
+ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
+ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
+ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
+ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
+#ifdef __i386__
+ unsigned long event_callback_cs; /* CS:EIP of event callback */
+ unsigned long event_callback_eip;
+ unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
+ unsigned long failsafe_callback_eip;
+#else
+ unsigned long event_callback_eip;
+ unsigned long failsafe_callback_eip;
+#ifdef __XEN__
+ union {
+ unsigned long syscall_callback_eip;
+ struct {
+ unsigned int event_callback_cs; /* compat CS of event cb */
+ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */
+ };
+ };
+#else
+ unsigned long syscall_callback_eip;
+#endif
+#endif
+ unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
+#ifdef __x86_64__
+ /* Segment base addresses. */
+ uint64_t fs_base;
+ uint64_t gs_base_kernel;
+ uint64_t gs_base_user;
+#endif
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+struct arch_shared_info {
+ unsigned long max_pfn; /* max pfn that appears in table */
+ /* Frame containing list of mfns containing list of mfns containing p2m. */
+ xen_pfn_t pfn_to_mfn_frame_list_list;
+ unsigned long nmi_reason;
+ uint64_t pad[32];
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_fpu_taskswitch(int set);
+ * `
+ * Sets (if set!=0) or clears (if set==0) CR0.TS.
+ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_set_debugreg(int regno, unsigned long value);
+ *
+ * ` unsigned long
+ * ` HYPERVISOR_get_debugreg(int regno);
+ * For 0<=reg<=7, returns the debug register value.
+ * For other values of reg, returns ((unsigned long)-EINVAL).
+ * (Unfortunately, this interface is defective.)
+ */
+
+/*
+ * Prefix forces emulation of some non-trapping instructions.
+ * Currently only CPUID.
+ */
+#ifdef __ASSEMBLY__
+#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
+#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
+#else
+#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
+#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
+#endif
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86_32.h
@@ -1,0 +1,27 @@
+/******************************************************************************
+ * arch-x86_32.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "arch-x86/xen.h"
--- /dev/null
+++ b/sys/src/9/xen/xen-public/arch-x86_64.h
@@ -1,0 +1,43 @@
+/******************************************************************************
+ * arch-x86_64.h
+ *
+ * Guest OS interface to x86 64-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "arch-x86/xen.h"
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_set_callbacks(unsigned long event_selector,
+ * ` unsigned long event_address,
+ * ` unsigned long failsafe_selector,
+ * ` unsigned long failsafe_address);
+ * `
+ * Register for callbacks on events. When an event (from an event
+ * channel) occurs, event_address is used as the value of eip.
+ *
+ * A similar callback occurs if the segment selectors are invalid.
+ * failsafe_address is used as the value of eip.
+ *
+ * On x86_64, event_selector and failsafe_selector are ignored (???).
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/callback.h
@@ -1,0 +1,121 @@
+/******************************************************************************
+ * callback.h
+ *
+ * Register guest OS callbacks with Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __XEN_PUBLIC_CALLBACK_H__
+#define __XEN_PUBLIC_CALLBACK_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ * long callback_op(int cmd, void *extra_args)
+ * @cmd == CALLBACKOP_??? (callback operation).
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/* x86: Callback for event delivery. */
+#define CALLBACKTYPE_event 0
+
+/* x86: Failsafe callback when guest state cannot be restored by Xen. */
+#define CALLBACKTYPE_failsafe 1
+
+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
+#define CALLBACKTYPE_syscall 2
+
+/*
+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
+ * feature is enabled. Do not use this callback type in new code.
+ */
+#define CALLBACKTYPE_sysenter_deprecated 3
+
+/* x86: Callback for NMI delivery. */
+#define CALLBACKTYPE_nmi 4
+
+/*
+ * x86: sysenter is only available as follows:
+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ * [nb. also 64-bit guest applications on Intel CPUs
+ * ('64-on-64-on-64'), but syscall is preferred]
+ */
+#define CALLBACKTYPE_sysenter 5
+
+/*
+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ */
+#define CALLBACKTYPE_syscall32 7
+
+/*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events 0
+#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events)
+
+/*
+ * Register a callback.
+ */
+#define CALLBACKOP_register 0
+struct callback_register {
+ uint16_t type;
+ uint16_t flags;
+ xen_callback_t address;
+};
+typedef struct callback_register callback_register_t;
+DEFINE_XEN_GUEST_HANDLE(callback_register_t);
+
+/*
+ * Unregister a callback.
+ *
+ * Not all callbacks can be unregistered. -EINVAL will be returned if
+ * you attempt to unregister such a callback.
+ */
+#define CALLBACKOP_unregister 1
+struct callback_unregister {
+ uint16_t type;
+ uint16_t _unused;
+};
+typedef struct callback_unregister callback_unregister_t;
+DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030207
+#undef CALLBACKTYPE_sysenter
+#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated
+#endif
+
+#endif /* __XEN_PUBLIC_CALLBACK_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/dom0_ops.h
@@ -1,0 +1,120 @@
+/******************************************************************************
+ * dom0_ops.h
+ *
+ * Process command requests from domain-0 guest OS.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2003, B Dragovic
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_DOM0_OPS_H__
+#define __XEN_PUBLIC_DOM0_OPS_H__
+
+#include "xen.h"
+#include "platform.h"
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030204
+#error "dom0_ops.h is a compatibility interface only"
+#endif
+
+#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION
+
+#define DOM0_SETTIME XENPF_settime
+#define dom0_settime xenpf_settime
+#define dom0_settime_t xenpf_settime_t
+
+#define DOM0_ADD_MEMTYPE XENPF_add_memtype
+#define dom0_add_memtype xenpf_add_memtype
+#define dom0_add_memtype_t xenpf_add_memtype_t
+
+#define DOM0_DEL_MEMTYPE XENPF_del_memtype
+#define dom0_del_memtype xenpf_del_memtype
+#define dom0_del_memtype_t xenpf_del_memtype_t
+
+#define DOM0_READ_MEMTYPE XENPF_read_memtype
+#define dom0_read_memtype xenpf_read_memtype
+#define dom0_read_memtype_t xenpf_read_memtype_t
+
+#define DOM0_MICROCODE XENPF_microcode_update
+#define dom0_microcode xenpf_microcode_update
+#define dom0_microcode_t xenpf_microcode_update_t
+
+#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk
+#define dom0_platform_quirk xenpf_platform_quirk
+#define dom0_platform_quirk_t xenpf_platform_quirk_t
+
+typedef uint64_t cpumap_t;
+
+/* Unsupported legacy operation -- defined for API compatibility. */
+#define DOM0_MSR 15
+struct dom0_msr {
+ /* IN variables. */
+ uint32_t write;
+ cpumap_t cpu_mask;
+ uint32_t msr;
+ uint32_t in1;
+ uint32_t in2;
+ /* OUT variables. */
+ uint32_t out1;
+ uint32_t out2;
+};
+typedef struct dom0_msr dom0_msr_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
+
+/* Unsupported legacy operation -- defined for API compatibility. */
+#define DOM0_PHYSICAL_MEMORY_MAP 40
+struct dom0_memory_map_entry {
+ uint64_t start, end;
+ uint32_t flags; /* reserved */
+ uint8_t is_ram;
+};
+typedef struct dom0_memory_map_entry dom0_memory_map_entry_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t);
+
+struct dom0_op {
+ uint32_t cmd;
+ uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
+ union {
+ struct dom0_msr msr;
+ struct dom0_settime settime;
+ struct dom0_add_memtype add_memtype;
+ struct dom0_del_memtype del_memtype;
+ struct dom0_read_memtype read_memtype;
+ struct dom0_microcode microcode;
+ struct dom0_platform_quirk platform_quirk;
+ struct dom0_memory_map_entry physical_memory_map;
+ uint8_t pad[128];
+ } u;
+};
+typedef struct dom0_op dom0_op_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_op_t);
+
+#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/domctl.h
@@ -1,0 +1,998 @@
+/******************************************************************************
+ * domctl.h
+ *
+ * Domain management operations. For use by node control stack.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2003, B Dragovic
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_DOMCTL_H__
+#define __XEN_PUBLIC_DOMCTL_H__
+
+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)
+#error "domctl operations are intended for use by node control tools only"
+#endif
+
+#include "xen.h"
+#include "grant_table.h"
+#include "hvm/save.h"
+
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
+
+/*
+ * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
+ * If it is specified as zero, an id is auto-allocated and returned.
+ */
+/* XEN_DOMCTL_createdomain */
+struct xen_domctl_createdomain {
+ /* IN parameters */
+ uint32_t ssidref;
+ xen_domain_handle_t handle;
+ /* Is this an HVM guest (as opposed to a PV guest)? */
+#define _XEN_DOMCTL_CDF_hvm_guest 0
+#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest)
+ /* Use hardware-assisted paging if available? */
+#define _XEN_DOMCTL_CDF_hap 1
+#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap)
+ /* Should domain memory integrity be verifed by tboot during Sx? */
+#define _XEN_DOMCTL_CDF_s3_integrity 2
+#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity)
+ /* Disable out-of-sync shadow page tables? */
+#define _XEN_DOMCTL_CDF_oos_off 3
+#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off)
+ uint32_t flags;
+};
+typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
+
+/* XEN_DOMCTL_getdomaininfo */
+struct xen_domctl_getdomaininfo {
+ /* OUT variables. */
+ domid_t domain; /* Also echoed in domctl.domain */
+ /* Domain is scheduled to die. */
+#define _XEN_DOMINF_dying 0
+#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying)
+ /* Domain is an HVM guest (as opposed to a PV guest). */
+#define _XEN_DOMINF_hvm_guest 1
+#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest)
+ /* The guest OS has shut down. */
+#define _XEN_DOMINF_shutdown 2
+#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown)
+ /* Currently paused by control software. */
+#define _XEN_DOMINF_paused 3
+#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused)
+ /* Currently blocked pending an event. */
+#define _XEN_DOMINF_blocked 4
+#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked)
+ /* Domain is currently running. */
+#define _XEN_DOMINF_running 5
+#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running)
+ /* Being debugged. */
+#define _XEN_DOMINF_debugged 6
+#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged)
+ /* XEN_DOMINF_shutdown guest-supplied code. */
+#define XEN_DOMINF_shutdownmask 255
+#define XEN_DOMINF_shutdownshift 16
+ uint32_t flags; /* XEN_DOMINF_* */
+ uint64_aligned_t tot_pages;
+ uint64_aligned_t max_pages;
+ uint64_aligned_t outstanding_pages;
+ uint64_aligned_t shr_pages;
+ uint64_aligned_t paged_pages;
+ uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */
+ uint64_aligned_t cpu_time;
+ uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */
+ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
+ uint32_t ssidref;
+ xen_domain_handle_t handle;
+ uint32_t cpupool;
+};
+typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
+
+
+/* XEN_DOMCTL_getmemlist */
+struct xen_domctl_getmemlist {
+ /* IN variables. */
+ /* Max entries to write to output buffer. */
+ uint64_aligned_t max_pfns;
+ /* Start index in guest's page list. */
+ uint64_aligned_t start_pfn;
+ XEN_GUEST_HANDLE_64(uint64) buffer;
+ /* OUT variables. */
+ uint64_aligned_t num_pfns;
+};
+typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);
+
+
+/* XEN_DOMCTL_getpageframeinfo */
+
+#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28
+#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28)
+#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28)
+#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28)
+#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28)
+#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28)
+#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28)
+#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
+#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */
+#define XEN_DOMCTL_PFINFO_XALLOC (0xeU<<28) /* allocate-only page */
+#define XEN_DOMCTL_PFINFO_BROKEN (0xdU<<28) /* broken page */
+#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
+
+struct xen_domctl_getpageframeinfo {
+ /* IN variables. */
+ uint64_aligned_t gmfn; /* GMFN to query */
+ /* OUT variables. */
+ /* Is the page PINNED to a type? */
+ uint32_t type; /* see above type defs */
+};
+typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t);
+
+
+/* XEN_DOMCTL_getpageframeinfo2 */
+struct xen_domctl_getpageframeinfo2 {
+ /* IN variables. */
+ uint64_aligned_t num;
+ /* IN/OUT variables. */
+ XEN_GUEST_HANDLE_64(uint32) array;
+};
+typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
+
+/* XEN_DOMCTL_getpageframeinfo3 */
+struct xen_domctl_getpageframeinfo3 {
+ /* IN variables. */
+ uint64_aligned_t num;
+ /* IN/OUT variables. */
+ XEN_GUEST_HANDLE_64(xen_pfn_t) array;
+};
+
+
+/*
+ * Control shadow pagetables operation
+ */
+/* XEN_DOMCTL_shadow_op */
+
+/* Disable shadow mode. */
+#define XEN_DOMCTL_SHADOW_OP_OFF 0
+
+/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE 32
+
+/* Log-dirty bitmap operations. */
+ /* Return the bitmap and clean internal copy for next round. */
+#define XEN_DOMCTL_SHADOW_OP_CLEAN 11
+ /* Return the bitmap but do not modify internal copy. */
+#define XEN_DOMCTL_SHADOW_OP_PEEK 12
+
+/* Memory allocation accessors. */
+#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30
+#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31
+
+/* Legacy enable operations. */
+ /* Equiv. to ENABLE with no mode flags. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1
+ /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2
+ /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3
+
+/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */
+ /*
+ * Shadow pagetables are refcounted: guest does not use explicit mmu
+ * operations nor write-protect its pagetables.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1)
+ /*
+ * Log pages in a bitmap as they are dirtied.
+ * Used for live relocation to determine which pages must be re-sent.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2)
+ /*
+ * Automatically translate GPFNs into MFNs.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3)
+ /*
+ * Xen does not steal virtual address space from the guest.
+ * Requires HVM support.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4)
+
+struct xen_domctl_shadow_op_stats {
+ uint32_t fault_count;
+ uint32_t dirty_count;
+};
+typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t);
+
+struct xen_domctl_shadow_op {
+ /* IN variables. */
+ uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */
+
+ /* OP_ENABLE */
+ uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */
+
+ /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */
+ uint32_t mb; /* Shadow memory allocation in MB */
+
+ /* OP_PEEK / OP_CLEAN */
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
+ uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */
+ struct xen_domctl_shadow_op_stats stats;
+};
+typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t);
+
+
+/* XEN_DOMCTL_max_mem */
+struct xen_domctl_max_mem {
+ /* IN variables. */
+ uint64_aligned_t max_memkb;
+};
+typedef struct xen_domctl_max_mem xen_domctl_max_mem_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t);
+
+
+/* XEN_DOMCTL_setvcpucontext */
+/* XEN_DOMCTL_getvcpucontext */
+struct xen_domctl_vcpucontext {
+ uint32_t vcpu; /* IN */
+ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
+
+
+/* XEN_DOMCTL_getvcpuinfo */
+struct xen_domctl_getvcpuinfo {
+ /* IN variables. */
+ uint32_t vcpu;
+ /* OUT variables. */
+ uint8_t online; /* currently online (not hotplugged)? */
+ uint8_t blocked; /* blocked waiting for an event? */
+ uint8_t running; /* currently scheduled on its CPU? */
+ uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */
+ uint32_t cpu; /* current mapping */
+};
+typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
+
+
+/* Get/set the NUMA node(s) with which the guest has affinity with. */
+/* XEN_DOMCTL_setnodeaffinity */
+/* XEN_DOMCTL_getnodeaffinity */
+struct xen_domctl_nodeaffinity {
+ struct xenctl_bitmap nodemap;/* IN */
+};
+typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t);
+
+
+/* Get/set which physical cpus a vcpu can execute on. */
+/* XEN_DOMCTL_setvcpuaffinity */
+/* XEN_DOMCTL_getvcpuaffinity */
+struct xen_domctl_vcpuaffinity {
+ uint32_t vcpu; /* IN */
+ struct xenctl_bitmap cpumap; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t);
+
+
+/* XEN_DOMCTL_max_vcpus */
+struct xen_domctl_max_vcpus {
+ uint32_t max; /* maximum number of vcpus */
+};
+typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
+
+
+/* XEN_DOMCTL_scheduler_op */
+/* Scheduler types. */
+#define XEN_SCHEDULER_SEDF 4
+#define XEN_SCHEDULER_CREDIT 5
+#define XEN_SCHEDULER_CREDIT2 6
+#define XEN_SCHEDULER_ARINC653 7
+/* Set or get info? */
+#define XEN_DOMCTL_SCHEDOP_putinfo 0
+#define XEN_DOMCTL_SCHEDOP_getinfo 1
+struct xen_domctl_scheduler_op {
+ uint32_t sched_id; /* XEN_SCHEDULER_* */
+ uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */
+ union {
+ struct xen_domctl_sched_sedf {
+ uint64_aligned_t period;
+ uint64_aligned_t slice;
+ uint64_aligned_t latency;
+ uint32_t extratime;
+ uint32_t weight;
+ } sedf;
+ struct xen_domctl_sched_credit {
+ uint16_t weight;
+ uint16_t cap;
+ } credit;
+ struct xen_domctl_sched_credit2 {
+ uint16_t weight;
+ } credit2;
+ } u;
+};
+typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t);
+
+
+/* XEN_DOMCTL_setdomainhandle */
+struct xen_domctl_setdomainhandle {
+ xen_domain_handle_t handle;
+};
+typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t);
+
+
+/* XEN_DOMCTL_setdebugging */
+struct xen_domctl_setdebugging {
+ uint8_t enable;
+};
+typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t);
+
+
+/* XEN_DOMCTL_irq_permission */
+struct xen_domctl_irq_permission {
+ uint8_t pirq;
+ uint8_t allow_access; /* flag to specify enable/disable of IRQ access */
+};
+typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t);
+
+
+/* XEN_DOMCTL_iomem_permission */
+struct xen_domctl_iomem_permission {
+ uint64_aligned_t first_mfn;/* first page (physical page number) in range */
+ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */
+ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
+};
+typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t);
+
+
+/* XEN_DOMCTL_ioport_permission */
+struct xen_domctl_ioport_permission {
+ uint32_t first_port; /* first port int range */
+ uint32_t nr_ports; /* size of port range */
+ uint8_t allow_access; /* allow or deny access to range? */
+};
+typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t);
+
+
+/* XEN_DOMCTL_hypercall_init */
+struct xen_domctl_hypercall_init {
+ uint64_aligned_t gmfn; /* GMFN to be initialised */
+};
+typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t);
+
+
+/* XEN_DOMCTL_arch_setup */
+#define _XEN_DOMAINSETUP_hvm_guest 0
+#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest)
+#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */
+#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query)
+#define _XEN_DOMAINSETUP_sioemu_guest 2
+#define XEN_DOMAINSETUP_sioemu_guest (1UL<<_XEN_DOMAINSETUP_sioemu_guest)
+typedef struct xen_domctl_arch_setup {
+ uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */
+} xen_domctl_arch_setup_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t);
+
+
+/* XEN_DOMCTL_settimeoffset */
+struct xen_domctl_settimeoffset {
+ int32_t time_offset_seconds; /* applied to domain wallclock time */
+};
+typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
+
+/* XEN_DOMCTL_gethvmcontext */
+/* XEN_DOMCTL_sethvmcontext */
+typedef struct xen_domctl_hvmcontext {
+ uint32_t size; /* IN/OUT: size of buffer / bytes filled */
+ XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call
+ * gethvmcontext with NULL
+ * buffer to get size req'd */
+} xen_domctl_hvmcontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t);
+
+
+/* XEN_DOMCTL_set_address_size */
+/* XEN_DOMCTL_get_address_size */
+typedef struct xen_domctl_address_size {
+ uint32_t size;
+} xen_domctl_address_size_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t);
+
+
+/* XEN_DOMCTL_real_mode_area */
+struct xen_domctl_real_mode_area {
+ uint32_t log; /* log2 of Real Mode Area size */
+};
+typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
+
+/* XEN_DOMCTL_sendtrigger */
+#define XEN_DOMCTL_SENDTRIGGER_NMI 0
+#define XEN_DOMCTL_SENDTRIGGER_RESET 1
+#define XEN_DOMCTL_SENDTRIGGER_INIT 2
+#define XEN_DOMCTL_SENDTRIGGER_POWER 3
+#define XEN_DOMCTL_SENDTRIGGER_SLEEP 4
+struct xen_domctl_sendtrigger {
+ uint32_t trigger; /* IN */
+ uint32_t vcpu; /* IN */
+};
+typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
+
+
+/* Assign PCI device to HVM guest. Sets up IOMMU structures. */
+/* XEN_DOMCTL_assign_device */
+/* XEN_DOMCTL_test_assign_device */
+/* XEN_DOMCTL_deassign_device */
+struct xen_domctl_assign_device {
+ uint32_t machine_sbdf; /* machine PCI ID of assigned device */
+};
+typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
+
+/* Retrieve sibling devices infomation of machine_sbdf */
+/* XEN_DOMCTL_get_device_group */
+struct xen_domctl_get_device_group {
+ uint32_t machine_sbdf; /* IN */
+ uint32_t max_sdevs; /* IN */
+ uint32_t num_sdevs; /* OUT */
+ XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */
+};
+typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
+
+/* Pass-through interrupts: bind real irq -> hvm devfn. */
+/* XEN_DOMCTL_bind_pt_irq */
+/* XEN_DOMCTL_unbind_pt_irq */
+typedef enum pt_irq_type_e {
+ PT_IRQ_TYPE_PCI,
+ PT_IRQ_TYPE_ISA,
+ PT_IRQ_TYPE_MSI,
+ PT_IRQ_TYPE_MSI_TRANSLATE,
+} pt_irq_type_t;
+struct xen_domctl_bind_pt_irq {
+ uint32_t machine_irq;
+ pt_irq_type_t irq_type;
+ uint32_t hvm_domid;
+
+ union {
+ struct {
+ uint8_t isa_irq;
+ } isa;
+ struct {
+ uint8_t bus;
+ uint8_t device;
+ uint8_t intx;
+ } pci;
+ struct {
+ uint8_t gvec;
+ uint32_t gflags;
+ uint64_aligned_t gtable;
+ } msi;
+ } u;
+};
+typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t);
+
+
+/* Bind machine I/O address range -> HVM address range. */
+/* XEN_DOMCTL_memory_mapping */
+#define DPCI_ADD_MAPPING 1
+#define DPCI_REMOVE_MAPPING 0
+struct xen_domctl_memory_mapping {
+ uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */
+ uint64_aligned_t first_mfn; /* first page (machine page) in range */
+ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */
+ uint32_t add_mapping; /* add or remove mapping */
+ uint32_t padding; /* padding for 64-bit aligned structure */
+};
+typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t);
+
+
+/* Bind machine I/O port range -> HVM I/O port range. */
+/* XEN_DOMCTL_ioport_mapping */
+struct xen_domctl_ioport_mapping {
+ uint32_t first_gport; /* first guest IO port*/
+ uint32_t first_mport; /* first machine IO port */
+ uint32_t nr_ports; /* size of port range */
+ uint32_t add_mapping; /* add or remove mapping */
+};
+typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t);
+
+
+/*
+ * Pin caching type of RAM space for x86 HVM domU.
+ */
+/* XEN_DOMCTL_pin_mem_cacheattr */
+/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */
+#define XEN_DOMCTL_MEM_CACHEATTR_UC 0
+#define XEN_DOMCTL_MEM_CACHEATTR_WC 1
+#define XEN_DOMCTL_MEM_CACHEATTR_WT 4
+#define XEN_DOMCTL_MEM_CACHEATTR_WP 5
+#define XEN_DOMCTL_MEM_CACHEATTR_WB 6
+#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7
+struct xen_domctl_pin_mem_cacheattr {
+ uint64_aligned_t start, end;
+ uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */
+};
+typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
+
+
+/* XEN_DOMCTL_set_ext_vcpucontext */
+/* XEN_DOMCTL_get_ext_vcpucontext */
+struct xen_domctl_ext_vcpucontext {
+ /* IN: VCPU that this call applies to. */
+ uint32_t vcpu;
+ /*
+ * SET: Size of struct (IN)
+ * GET: Size of struct (OUT, up to 128 bytes)
+ */
+ uint32_t size;
+#if defined(__i386__) || defined(__x86_64__)
+ /* SYSCALL from 32-bit mode and SYSENTER callback information. */
+ /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */
+ uint64_aligned_t syscall32_callback_eip;
+ uint64_aligned_t sysenter_callback_eip;
+ uint16_t syscall32_callback_cs;
+ uint16_t sysenter_callback_cs;
+ uint8_t syscall32_disables_events;
+ uint8_t sysenter_disables_events;
+#if defined(__GNUC__)
+ union {
+ uint64_aligned_t mcg_cap;
+ struct hvm_vmce_vcpu vmce;
+ };
+#else
+ struct hvm_vmce_vcpu vmce;
+#endif
+#endif
+};
+typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);
+
+/*
+ * Set the target domain for a domain
+ */
+/* XEN_DOMCTL_set_target */
+struct xen_domctl_set_target {
+ domid_t target;
+};
+typedef struct xen_domctl_set_target xen_domctl_set_target_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t);
+
+#if defined(__i386__) || defined(__x86_64__)
+# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF
+/* XEN_DOMCTL_set_cpuid */
+struct xen_domctl_cpuid {
+ uint32_t input[2];
+ uint32_t eax;
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
+};
+typedef struct xen_domctl_cpuid xen_domctl_cpuid_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t);
+#endif
+
+/* XEN_DOMCTL_subscribe */
+struct xen_domctl_subscribe {
+ uint32_t port; /* IN */
+};
+typedef struct xen_domctl_subscribe xen_domctl_subscribe_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t);
+
+/*
+ * Define the maximum machine address size which should be allocated
+ * to a guest.
+ */
+/* XEN_DOMCTL_set_machine_address_size */
+/* XEN_DOMCTL_get_machine_address_size */
+
+/*
+ * Do not inject spurious page faults into this domain.
+ */
+/* XEN_DOMCTL_suppress_spurious_page_faults */
+
+/* XEN_DOMCTL_debug_op */
+#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0
+#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1
+struct xen_domctl_debug_op {
+ uint32_t op; /* IN */
+ uint32_t vcpu; /* IN */
+};
+typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);
+
+/*
+ * Request a particular record from the HVM context
+ */
+/* XEN_DOMCTL_gethvmcontext_partial */
+typedef struct xen_domctl_hvmcontext_partial {
+ uint32_t type; /* IN: Type of record required */
+ uint32_t instance; /* IN: Instance of that type */
+ XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */
+} xen_domctl_hvmcontext_partial_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
+
+/* XEN_DOMCTL_disable_migrate */
+typedef struct xen_domctl_disable_migrate {
+ uint32_t disable; /* IN: 1: disable migration and restore */
+} xen_domctl_disable_migrate_t;
+
+
+/* XEN_DOMCTL_gettscinfo */
+/* XEN_DOMCTL_settscinfo */
+struct xen_guest_tsc_info {
+ uint32_t tsc_mode;
+ uint32_t gtsc_khz;
+ uint32_t incarnation;
+ uint32_t pad;
+ uint64_aligned_t elapsed_nsec;
+};
+typedef struct xen_guest_tsc_info xen_guest_tsc_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t);
+typedef struct xen_domctl_tsc_info {
+ XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */
+ xen_guest_tsc_info_t info; /* IN */
+} xen_domctl_tsc_info_t;
+
+/* XEN_DOMCTL_gdbsx_guestmemio guest mem io */
+struct xen_domctl_gdbsx_memio {
+ /* IN */
+ uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */
+ uint64_aligned_t gva; /* guest virtual address */
+ uint64_aligned_t uva; /* user buffer virtual address */
+ uint32_t len; /* number of bytes to read/write */
+ uint8_t gwr; /* 0 = read from guest. 1 = write to guest */
+ /* OUT */
+ uint32_t remain; /* bytes remaining to be copied */
+};
+
+/* XEN_DOMCTL_gdbsx_pausevcpu */
+/* XEN_DOMCTL_gdbsx_unpausevcpu */
+struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */
+ uint32_t vcpu; /* which vcpu */
+};
+
+/* XEN_DOMCTL_gdbsx_domstatus */
+struct xen_domctl_gdbsx_domstatus {
+ /* OUT */
+ uint8_t paused; /* is the domain paused */
+ uint32_t vcpu_id; /* any vcpu in an event? */
+ uint32_t vcpu_ev; /* if yes, what event? */
+};
+
+/*
+ * Memory event operations
+ */
+
+/* XEN_DOMCTL_mem_event_op */
+
+/*
+ * Domain memory paging
+ * Page memory in and out.
+ * Domctl interface to set up and tear down the
+ * pager<->hypervisor interface. Use XENMEM_paging_op*
+ * to perform per-page operations.
+ *
+ * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several
+ * non-standard error codes to indicate why paging could not be enabled:
+ * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
+ * EMLINK - guest has iommu passthrough enabled
+ * EXDEV - guest has PoD enabled
+ * EBUSY - guest has or had paging enabled, ring buffer still active
+ */
+#define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1
+
+#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0
+#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1
+
+/*
+ * Access permissions.
+ *
+ * As with paging, use the domctl for teardown/setup of the
+ * helper<->hypervisor interface.
+ *
+ * There are HVM hypercalls to set the per-page access permissions of every
+ * page in a domain. When one of these permissions--independent, read,
+ * write, and execute--is violated, the VCPU is paused and a memory event
+ * is sent with what happened. (See public/mem_event.h) .
+ *
+ * The memory event handler can then resume the VCPU and redo the access
+ * with a XENMEM_access_op_resume hypercall.
+ *
+ * The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE domctl returns several
+ * non-standard error codes to indicate why access could not be enabled:
+ * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
+ * EBUSY - guest has or had access enabled, ring buffer still active
+ */
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2
+
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1
+
+/*
+ * Sharing ENOMEM helper.
+ *
+ * As with paging, use the domctl for teardown/setup of the
+ * helper<->hypervisor interface.
+ *
+ * If setup, this ring is used to communicate failed allocations
+ * in the unshare path. XENMEM_sharing_op_resume is used to wake up
+ * vcpus that could not unshare.
+ *
+ * Note that shring can be turned on (as per the domctl below)
+ * *without* this ring being setup.
+ */
+#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3
+
+#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE 0
+#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE 1
+
+/* Use for teardown/setup of helper<->hypervisor interface for paging,
+ * access and sharing.*/
+struct xen_domctl_mem_event_op {
+ uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */
+ uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */
+
+ uint32_t port; /* OUT: event channel for ring */
+};
+typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t);
+
+/*
+ * Memory sharing operations
+ */
+/* XEN_DOMCTL_mem_sharing_op.
+ * The CONTROL sub-domctl is used for bringup/teardown. */
+#define XEN_DOMCTL_MEM_SHARING_CONTROL 0
+
+struct xen_domctl_mem_sharing_op {
+ uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */
+
+ union {
+ uint8_t enable; /* CONTROL */
+ } u;
+};
+typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t);
+
+struct xen_domctl_audit_p2m {
+ /* OUT error counts */
+ uint64_t orphans;
+ uint64_t m2p_bad;
+ uint64_t p2m_bad;
+};
+typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t);
+
+struct xen_domctl_set_virq_handler {
+ uint32_t virq; /* IN */
+};
+typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t);
+
+#if defined(__i386__) || defined(__x86_64__)
+/* XEN_DOMCTL_setvcpuextstate */
+/* XEN_DOMCTL_getvcpuextstate */
+struct xen_domctl_vcpuextstate {
+ /* IN: VCPU that this call applies to. */
+ uint32_t vcpu;
+ /*
+ * SET: xfeature support mask of struct (IN)
+ * GET: xfeature support mask of struct (IN/OUT)
+ * xfeature mask is served as identifications of the saving format
+ * so that compatible CPUs can have a check on format to decide
+ * whether it can restore.
+ */
+ uint64_aligned_t xfeature_mask;
+ /*
+ * SET: Size of struct (IN)
+ * GET: Size of struct (IN/OUT)
+ */
+ uint64_aligned_t size;
+ XEN_GUEST_HANDLE_64(uint64) buffer;
+};
+typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t);
+#endif
+
+/* XEN_DOMCTL_set_access_required: sets whether a memory event listener
+ * must be present to handle page access events: if false, the page
+ * access will revert to full permissions if no one is listening;
+ * */
+struct xen_domctl_set_access_required {
+ uint8_t access_required;
+};
+typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t);
+
+struct xen_domctl_set_broken_page_p2m {
+ uint64_aligned_t pfn;
+};
+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+
+struct xen_domctl {
+ uint32_t cmd;
+#define XEN_DOMCTL_createdomain 1
+#define XEN_DOMCTL_destroydomain 2
+#define XEN_DOMCTL_pausedomain 3
+#define XEN_DOMCTL_unpausedomain 4
+#define XEN_DOMCTL_getdomaininfo 5
+#define XEN_DOMCTL_getmemlist 6
+#define XEN_DOMCTL_getpageframeinfo 7
+#define XEN_DOMCTL_getpageframeinfo2 8
+#define XEN_DOMCTL_setvcpuaffinity 9
+#define XEN_DOMCTL_shadow_op 10
+#define XEN_DOMCTL_max_mem 11
+#define XEN_DOMCTL_setvcpucontext 12
+#define XEN_DOMCTL_getvcpucontext 13
+#define XEN_DOMCTL_getvcpuinfo 14
+#define XEN_DOMCTL_max_vcpus 15
+#define XEN_DOMCTL_scheduler_op 16
+#define XEN_DOMCTL_setdomainhandle 17
+#define XEN_DOMCTL_setdebugging 18
+#define XEN_DOMCTL_irq_permission 19
+#define XEN_DOMCTL_iomem_permission 20
+#define XEN_DOMCTL_ioport_permission 21
+#define XEN_DOMCTL_hypercall_init 22
+#define XEN_DOMCTL_arch_setup 23
+#define XEN_DOMCTL_settimeoffset 24
+#define XEN_DOMCTL_getvcpuaffinity 25
+#define XEN_DOMCTL_real_mode_area 26
+#define XEN_DOMCTL_resumedomain 27
+#define XEN_DOMCTL_sendtrigger 28
+#define XEN_DOMCTL_subscribe 29
+#define XEN_DOMCTL_gethvmcontext 33
+#define XEN_DOMCTL_sethvmcontext 34
+#define XEN_DOMCTL_set_address_size 35
+#define XEN_DOMCTL_get_address_size 36
+#define XEN_DOMCTL_assign_device 37
+#define XEN_DOMCTL_bind_pt_irq 38
+#define XEN_DOMCTL_memory_mapping 39
+#define XEN_DOMCTL_ioport_mapping 40
+#define XEN_DOMCTL_pin_mem_cacheattr 41
+#define XEN_DOMCTL_set_ext_vcpucontext 42
+#define XEN_DOMCTL_get_ext_vcpucontext 43
+#define XEN_DOMCTL_set_opt_feature 44 /* Obsolete IA64 only */
+#define XEN_DOMCTL_test_assign_device 45
+#define XEN_DOMCTL_set_target 46
+#define XEN_DOMCTL_deassign_device 47
+#define XEN_DOMCTL_unbind_pt_irq 48
+#define XEN_DOMCTL_set_cpuid 49
+#define XEN_DOMCTL_get_device_group 50
+#define XEN_DOMCTL_set_machine_address_size 51
+#define XEN_DOMCTL_get_machine_address_size 52
+#define XEN_DOMCTL_suppress_spurious_page_faults 53
+#define XEN_DOMCTL_debug_op 54
+#define XEN_DOMCTL_gethvmcontext_partial 55
+#define XEN_DOMCTL_mem_event_op 56
+#define XEN_DOMCTL_mem_sharing_op 57
+#define XEN_DOMCTL_disable_migrate 58
+#define XEN_DOMCTL_gettscinfo 59
+#define XEN_DOMCTL_settscinfo 60
+#define XEN_DOMCTL_getpageframeinfo3 61
+#define XEN_DOMCTL_setvcpuextstate 62
+#define XEN_DOMCTL_getvcpuextstate 63
+#define XEN_DOMCTL_set_access_required 64
+#define XEN_DOMCTL_audit_p2m 65
+#define XEN_DOMCTL_set_virq_handler 66
+#define XEN_DOMCTL_set_broken_page_p2m 67
+#define XEN_DOMCTL_setnodeaffinity 68
+#define XEN_DOMCTL_getnodeaffinity 69
+#define XEN_DOMCTL_gdbsx_guestmemio 1000
+#define XEN_DOMCTL_gdbsx_pausevcpu 1001
+#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
+#define XEN_DOMCTL_gdbsx_domstatus 1003
+ uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
+ domid_t domain;
+ union {
+ struct xen_domctl_createdomain createdomain;
+ struct xen_domctl_getdomaininfo getdomaininfo;
+ struct xen_domctl_getmemlist getmemlist;
+ struct xen_domctl_getpageframeinfo getpageframeinfo;
+ struct xen_domctl_getpageframeinfo2 getpageframeinfo2;
+ struct xen_domctl_getpageframeinfo3 getpageframeinfo3;
+ struct xen_domctl_nodeaffinity nodeaffinity;
+ struct xen_domctl_vcpuaffinity vcpuaffinity;
+ struct xen_domctl_shadow_op shadow_op;
+ struct xen_domctl_max_mem max_mem;
+ struct xen_domctl_vcpucontext vcpucontext;
+ struct xen_domctl_getvcpuinfo getvcpuinfo;
+ struct xen_domctl_max_vcpus max_vcpus;
+ struct xen_domctl_scheduler_op scheduler_op;
+ struct xen_domctl_setdomainhandle setdomainhandle;
+ struct xen_domctl_setdebugging setdebugging;
+ struct xen_domctl_irq_permission irq_permission;
+ struct xen_domctl_iomem_permission iomem_permission;
+ struct xen_domctl_ioport_permission ioport_permission;
+ struct xen_domctl_hypercall_init hypercall_init;
+ struct xen_domctl_arch_setup arch_setup;
+ struct xen_domctl_settimeoffset settimeoffset;
+ struct xen_domctl_disable_migrate disable_migrate;
+ struct xen_domctl_tsc_info tsc_info;
+ struct xen_domctl_real_mode_area real_mode_area;
+ struct xen_domctl_hvmcontext hvmcontext;
+ struct xen_domctl_hvmcontext_partial hvmcontext_partial;
+ struct xen_domctl_address_size address_size;
+ struct xen_domctl_sendtrigger sendtrigger;
+ struct xen_domctl_get_device_group get_device_group;
+ struct xen_domctl_assign_device assign_device;
+ struct xen_domctl_bind_pt_irq bind_pt_irq;
+ struct xen_domctl_memory_mapping memory_mapping;
+ struct xen_domctl_ioport_mapping ioport_mapping;
+ struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr;
+ struct xen_domctl_ext_vcpucontext ext_vcpucontext;
+ struct xen_domctl_set_target set_target;
+ struct xen_domctl_subscribe subscribe;
+ struct xen_domctl_debug_op debug_op;
+ struct xen_domctl_mem_event_op mem_event_op;
+ struct xen_domctl_mem_sharing_op mem_sharing_op;
+#if defined(__i386__) || defined(__x86_64__)
+ struct xen_domctl_cpuid cpuid;
+ struct xen_domctl_vcpuextstate vcpuextstate;
+#endif
+ struct xen_domctl_set_access_required access_required;
+ struct xen_domctl_audit_p2m audit_p2m;
+ struct xen_domctl_set_virq_handler set_virq_handler;
+ struct xen_domctl_gdbsx_memio gdbsx_guest_memio;
+ struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
+ struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
+ struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
+ uint8_t pad[128];
+ } u;
+};
+typedef struct xen_domctl xen_domctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
+
+#endif /* __XEN_PUBLIC_DOMCTL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/elfnote.h
@@ -1,0 +1,271 @@
+/******************************************************************************
+ * elfnote.h
+ *
+ * Definitions used for the Xen ELF notes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell, XenSource Ltd.
+ */
+
+#ifndef __XEN_PUBLIC_ELFNOTE_H__
+#define __XEN_PUBLIC_ELFNOTE_H__
+
+/*
+ * `incontents 200 elfnotes ELF notes
+ *
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
+ * name field.
+ *
+ * Numeric types are either 4 or 8 bytes depending on the content of
+ * the desc field.
+ *
+ * LEGACY indicated the fields in the legacy __xen_guest string which
+ * this a note type replaces.
+ *
+ * String values (for non-legacy) are NULL terminated ASCII, also known
+ * as ASCIZ type.
+ */
+
+/*
+ * NAME=VALUE pair (string).
+ */
+#define XEN_ELFNOTE_INFO 0
+
+/*
+ * The virtual address of the entry point (numeric).
+ *
+ * LEGACY: VIRT_ENTRY
+ */
+#define XEN_ELFNOTE_ENTRY 1
+
+/* The virtual address of the hypercall transfer page (numeric).
+ *
+ * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page
+ * number not a virtual address)
+ */
+#define XEN_ELFNOTE_HYPERCALL_PAGE 2
+
+/* The virtual address where the kernel image should be mapped (numeric).
+ *
+ * Defaults to 0.
+ *
+ * LEGACY: VIRT_BASE
+ */
+#define XEN_ELFNOTE_VIRT_BASE 3
+
+/*
+ * The offset of the ELF paddr field from the acutal required
+ * psuedo-physical address (numeric).
+ *
+ * This is used to maintain backwards compatibility with older kernels
+ * which wrote __PAGE_OFFSET into that field. This field defaults to 0
+ * if not present.
+ *
+ * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE)
+ */
+#define XEN_ELFNOTE_PADDR_OFFSET 4
+
+/*
+ * The version of Xen that we work with (string).
+ *
+ * LEGACY: XEN_VER
+ */
+#define XEN_ELFNOTE_XEN_VERSION 5
+
+/*
+ * The name of the guest operating system (string).
+ *
+ * LEGACY: GUEST_OS
+ */
+#define XEN_ELFNOTE_GUEST_OS 6
+
+/*
+ * The version of the guest operating system (string).
+ *
+ * LEGACY: GUEST_VER
+ */
+#define XEN_ELFNOTE_GUEST_VERSION 7
+
+/*
+ * The loader type (string).
+ *
+ * LEGACY: LOADER
+ */
+#define XEN_ELFNOTE_LOADER 8
+
+/*
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
+ *
+ * LEGACY: PAE (n.b. The legacy interface included a provision to
+ * indicate 'extended-cr3' support allowing L3 page tables to be
+ * placed above 4G. It is assumed that any kernel new enough to use
+ * these ELF notes will include this and therefore "yes" here is
+ * equivalent to "yes[entended-cr3]" in the __xen_guest interface.
+ */
+#define XEN_ELFNOTE_PAE_MODE 9
+
+/*
+ * The features supported/required by this kernel (string).
+ *
+ * The string must consist of a list of feature names (as given in
+ * features.h, without the "XENFEAT_" prefix) separated by '|'
+ * characters. If a feature is required for the kernel to function
+ * then the feature name must be preceded by a '!' character.
+ *
+ * LEGACY: FEATURES
+ */
+#define XEN_ELFNOTE_FEATURES 10
+
+/*
+ * The kernel requires the symbol table to be loaded (string = "yes" or "no")
+ * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence
+ * of this string as a boolean flag rather than requiring "yes" or
+ * "no".
+ */
+#define XEN_ELFNOTE_BSD_SYMTAB 11
+
+/*
+ * The lowest address the hypervisor hole can begin at (numeric).
+ *
+ * This must not be set higher than HYPERVISOR_VIRT_START. Its presence
+ * also indicates to the hypervisor that the kernel can deal with the
+ * hole starting at a higher address.
+ */
+#define XEN_ELFNOTE_HV_START_LOW 12
+
+/*
+ * List of maddr_t-sized mask/value pairs describing how to recognize
+ * (non-present) L1 page table entries carrying valid MFNs (numeric).
+ */
+#define XEN_ELFNOTE_L1_MFN_VALID 13
+
+/*
+ * Whether or not the guest supports cooperative suspend cancellation.
+ * This is a numeric value.
+ *
+ * Default is 0
+ */
+#define XEN_ELFNOTE_SUSPEND_CANCEL 14
+
+/*
+ * The (non-default) location the initial phys-to-machine map should be
+ * placed at by the hypervisor (Dom0) or the tools (DomU).
+ * The kernel must be prepared for this mapping to be established using
+ * large pages, despite such otherwise not being available to guests.
+ * The kernel must also be able to handle the page table pages used for
+ * this mapping not being accessible through the initial mapping.
+ * (Only x86-64 supports this at present.)
+ */
+#define XEN_ELFNOTE_INIT_P2M 15
+
+/*
+ * Whether or not the guest can deal with being passed an initrd not
+ * mapped through its initial page tables.
+ */
+#define XEN_ELFNOTE_MOD_START_PFN 16
+
+/*
+ * The features supported by this kernel (numeric).
+ *
+ * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
+ * kernel to specify support for features that older hypervisors don't
+ * know about. The set of features 4.2 and newer hypervisors will
+ * consider supported by the kernel is the combination of the sets
+ * specified through this and the string note.
+ *
+ * LEGACY: FEATURES
+ */
+#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
+
+/*
+ * The number of the highest elfnote defined.
+ */
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
+
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003
+
+#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/event_channel.h
@@ -1,0 +1,294 @@
+/******************************************************************************
+ * event_channel.h
+ *
+ * Event channels between domains.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
+#define __XEN_PUBLIC_EVENT_CHANNEL_H__
+
+#include "xen.h"
+
+/*
+ * `incontents 150 evtchn Event Channels
+ *
+ * Event channels are the basic primitive provided by Xen for event
+ * notifications. An event is the Xen equivalent of a hardware
+ * interrupt. They essentially store one bit of information, the event
+ * of interest is signalled by transitioning this bit from 0 to 1.
+ *
+ * Notifications are received by a guest via an upcall from Xen,
+ * indicating when an event arrives (setting the bit). Further
+ * notifications are masked until the bit is cleared again (therefore,
+ * guests must check the value of the bit after re-enabling event
+ * delivery to ensure no missed notifications).
+ *
+ * Event notifications can be masked by setting a flag; this is
+ * equivalent to disabling interrupts and can be used to ensure
+ * atomicity of certain operations in the guest kernel.
+ *
+ * Event channels are represented by the evtchn_* fields in
+ * struct shared_info and struct vcpu_info.
+ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args)
+ * `
+ * @cmd == EVTCHNOP_* (event-channel operation).
+ * @args == struct evtchn_* Operation-specific extra arguments (NULL if none).
+ */
+
+/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */
+#define EVTCHNOP_bind_interdomain 0
+#define EVTCHNOP_bind_virq 1
+#define EVTCHNOP_bind_pirq 2
+#define EVTCHNOP_close 3
+#define EVTCHNOP_send 4
+#define EVTCHNOP_status 5
+#define EVTCHNOP_alloc_unbound 6
+#define EVTCHNOP_bind_ipi 7
+#define EVTCHNOP_bind_vcpu 8
+#define EVTCHNOP_unmask 9
+#define EVTCHNOP_reset 10
+/* ` } */
+
+typedef uint32_t evtchn_port_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
+
+/*
+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
+ * accepting interdomain bindings from domain <remote_dom>. A fresh port
+ * is allocated in <dom> and returned as <port>.
+ * NOTES:
+ * 1. If the caller is unprivileged then <dom> must be DOMID_SELF.
+ * 2. <rdom> may be DOMID_SELF, allowing loopback connections.
+ */
+struct evtchn_alloc_unbound {
+ /* IN parameters */
+ domid_t dom, remote_dom;
+ /* OUT parameters */
+ evtchn_port_t port;
+};
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
+
+/*
+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
+ * a port that is unbound and marked as accepting bindings from the calling
+ * domain. A fresh port is allocated in the calling domain and returned as
+ * <local_port>.
+ * NOTES:
+ * 1. <remote_dom> may be DOMID_SELF, allowing loopback connections.
+ */
+struct evtchn_bind_interdomain {
+ /* IN parameters. */
+ domid_t remote_dom;
+ evtchn_port_t remote_port;
+ /* OUT parameters. */
+ evtchn_port_t local_port;
+};
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
+
+/*
+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
+ * vcpu.
+ * NOTES:
+ * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list
+ * in xen.h for the classification of each VIRQ.
+ * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be
+ * re-bound via EVTCHNOP_bind_vcpu.
+ * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.
+ * The allocated event channel is bound to the specified vcpu and the
+ * binding cannot be changed.
+ */
+struct evtchn_bind_virq {
+ /* IN parameters. */
+ uint32_t virq; /* enum virq */
+ uint32_t vcpu;
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
+
+/*
+ * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ <irq>).
+ * NOTES:
+ * 1. A physical IRQ may be bound to at most one event channel per domain.
+ * 2. Only a sufficiently-privileged domain may bind to a physical IRQ.
+ */
+struct evtchn_bind_pirq {
+ /* IN parameters. */
+ uint32_t pirq;
+#define BIND_PIRQ__WILL_SHARE 1
+ uint32_t flags; /* BIND_PIRQ__* */
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
+
+/*
+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
+ * NOTES:
+ * 1. The allocated event channel is bound to the specified vcpu. The binding
+ * may not be changed.
+ */
+struct evtchn_bind_ipi {
+ uint32_t vcpu;
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
+
+/*
+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is
+ * interdomain then the remote end is placed in the unbound state
+ * (EVTCHNSTAT_unbound), awaiting a new connection.
+ */
+struct evtchn_close {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_close evtchn_close_t;
+
+/*
+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local
+ * endpoint is <port>.
+ */
+struct evtchn_send {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_send evtchn_send_t;
+
+/*
+ * EVTCHNOP_status: Get the current status of the communication channel which
+ * has an endpoint at <dom, port>.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may obtain the status of an event
+ * channel for which <dom> is not DOMID_SELF.
+ */
+struct evtchn_status {
+ /* IN parameters */
+ domid_t dom;
+ evtchn_port_t port;
+ /* OUT parameters */
+#define EVTCHNSTAT_closed 0 /* Channel is not in use. */
+#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/
+#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */
+#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */
+#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */
+ uint32_t status;
+ uint32_t vcpu; /* VCPU to which this channel is bound. */
+ union {
+ struct {
+ domid_t dom;
+ } unbound; /* EVTCHNSTAT_unbound */
+ struct {
+ domid_t dom;
+ evtchn_port_t port;
+ } interdomain; /* EVTCHNSTAT_interdomain */
+ uint32_t pirq; /* EVTCHNSTAT_pirq */
+ uint32_t virq; /* EVTCHNSTAT_virq */
+ } u;
+};
+typedef struct evtchn_status evtchn_status_t;
+
+/*
+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
+ * event is pending.
+ * NOTES:
+ * 1. IPI-bound channels always notify the vcpu specified at bind time.
+ * This binding cannot be changed.
+ * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.
+ * This binding cannot be changed.
+ * 3. All other channels notify vcpu0 by default. This default is set when
+ * the channel is allocated (a port that is freed and subsequently reused
+ * has its binding reset to vcpu0).
+ */
+struct evtchn_bind_vcpu {
+ /* IN parameters. */
+ evtchn_port_t port;
+ uint32_t vcpu;
+};
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
+
+/*
+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
+ * a notification to the appropriate VCPU if an event is pending.
+ */
+struct evtchn_unmask {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_unmask evtchn_unmask_t;
+
+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+struct evtchn_reset {
+ /* IN parameters. */
+ domid_t dom;
+};
+typedef struct evtchn_reset evtchn_reset_t;
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
+ * `
+ * Superceded by new event_channel_op() hypercall since 0x00030202.
+ */
+struct evtchn_op {
+ uint32_t cmd; /* enum event_channel_op */
+ union {
+ struct evtchn_alloc_unbound alloc_unbound;
+ struct evtchn_bind_interdomain bind_interdomain;
+ struct evtchn_bind_virq bind_virq;
+ struct evtchn_bind_pirq bind_pirq;
+ struct evtchn_bind_ipi bind_ipi;
+ struct evtchn_close close;
+ struct evtchn_send send;
+ struct evtchn_status status;
+ struct evtchn_bind_vcpu bind_vcpu;
+ struct evtchn_unmask unmask;
+ } u;
+};
+typedef struct evtchn_op evtchn_op_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
+
+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/features.h
@@ -1,0 +1,109 @@
+/******************************************************************************
+ * features.h
+ *
+ * Feature flags, reported by XENVER_get_features.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_FEATURES_H__
+#define __XEN_PUBLIC_FEATURES_H__
+
+/*
+ * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES
+ *
+ * The list of all the features the guest supports. They are set by
+ * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES
+ * string. The format is the feature names (as given here without the
+ * "XENFEAT_" prefix) separated by '|' characters.
+ * If a feature is required for the kernel to function then the feature name
+ * must be preceded by a '!' character.
+ *
+ * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the
+ * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0,
+ */
+
+/*
+ * If set, the guest does not need to write-protect its pagetables, and can
+ * update them via direct writes.
+ */
+#define XENFEAT_writable_page_tables 0
+
+/*
+ * If set, the guest does not need to write-protect its segment descriptor
+ * tables, and can update them via direct writes.
+ */
+#define XENFEAT_writable_descriptor_tables 1
+
+/*
+ * If set, translation between the guest's 'pseudo-physical' address space
+ * and the host's machine address space are handled by the hypervisor. In this
+ * mode the guest does not need to perform phys-to/from-machine translations
+ * when performing page table operations.
+ */
+#define XENFEAT_auto_translated_physmap 2
+
+/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
+#define XENFEAT_supervisor_mode_kernel 3
+
+/*
+ * If set, the guest does not need to allocate x86 PAE page directories
+ * below 4GB. This flag is usually implied by auto_translated_physmap.
+ */
+#define XENFEAT_pae_pgdir_above_4gb 4
+
+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
+#define XENFEAT_mmu_pt_update_preserve_ad 5
+
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist 6
+
+/*
+ * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
+ * available pte bits.
+ */
+#define XENFEAT_gnttab_map_avail_bits 7
+
+/* x86: Does this Xen host support the HVM callback vector type? */
+#define XENFEAT_hvm_callback_vector 8
+
+/* x86: pvclock algorithm is safe to use on HVM */
+#define XENFEAT_hvm_safe_pvclock 9
+
+/* x86: pirq can be used by HVM guests */
+#define XENFEAT_hvm_pirqs 10
+
+/* operation as Dom0 is supported */
+#define XENFEAT_dom0 11
+
+#define XENFEAT_NR_SUBMAPS 1
+
+#endif /* __XEN_PUBLIC_FEATURES_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/gcov.h
@@ -1,0 +1,115 @@
+/******************************************************************************
+ * gcov.h
+ *
+ * Coverage structures exported by Xen.
+ * Structure is different from Gcc one.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2013, Citrix Systems R&D Ltd.
+ */
+
+#ifndef __XEN_PUBLIC_GCOV_H__
+#define __XEN_PUBLIC_GCOV_H__ __XEN_PUBLIC_GCOV_H__
+
+#define XENCOV_COUNTERS 5
+#define XENCOV_TAG_BASE 0x58544300u
+#define XENCOV_TAG_FILE (XENCOV_TAG_BASE+0x46u)
+#define XENCOV_TAG_FUNC (XENCOV_TAG_BASE+0x66u)
+#define XENCOV_TAG_COUNTER(n) (XENCOV_TAG_BASE+0x30u+((n)&0xfu))
+#define XENCOV_TAG_END (XENCOV_TAG_BASE+0x2eu)
+#define XENCOV_IS_TAG_COUNTER(n) \
+ ((n) >= XENCOV_TAG_COUNTER(0) && (n) < XENCOV_TAG_COUNTER(XENCOV_COUNTERS))
+#define XENCOV_COUNTER_NUM(n) ((n)-XENCOV_TAG_COUNTER(0))
+
+/*
+ * The main structure for the blob is
+ * BLOB := FILE.. END
+ * FILE := TAG_FILE VERSION STAMP FILENAME COUNTERS FUNCTIONS
+ * FILENAME := LEN characters
+ * characters are padded to 32 bit
+ * LEN := 32 bit value
+ * COUNTERS := TAG_COUNTER(n) NUM COUNTER..
+ * NUM := 32 bit valie
+ * COUNTER := 64 bit value
+ * FUNCTIONS := TAG_FUNC NUM FUNCTION..
+ * FUNCTION := IDENT CHECKSUM NUM_COUNTERS
+ *
+ * All tagged structures are aligned to 8 bytes
+ */
+
+/**
+ * File information
+ * Prefixed with XENCOV_TAG_FILE and a string with filename
+ * Aligned to 8 bytes
+ */
+struct xencov_file
+{
+ uint32_t tag; /* XENCOV_TAG_FILE */
+ uint32_t version;
+ uint32_t stamp;
+ uint32_t fn_len;
+ char filename[1];
+};
+
+
+/**
+ * Counters information
+ * Prefixed with XENCOV_TAG_COUNTER(n) where n is 0..(XENCOV_COUNTERS-1)
+ * Aligned to 8 bytes
+ */
+struct xencov_counter
+{
+ uint32_t tag; /* XENCOV_TAG_COUNTER(n) */
+ uint32_t num;
+ uint64_t values[1];
+};
+
+/**
+ * Information for each function
+ * Number of counter is equal to the number of counter structures got before
+ */
+struct xencov_function
+{
+ uint32_t ident;
+ uint32_t checksum;
+ uint32_t num_counters[1];
+};
+
+/**
+ * Information for all functions
+ * Aligned to 8 bytes
+ */
+struct xencov_functions
+{
+ uint32_t tag; /* XENCOV_TAG_FUNC */
+ uint32_t num;
+ struct xencov_function xencov_function[1];
+};
+
+/**
+ * Terminator
+ */
+struct xencov_end
+{
+ uint32_t tag; /* XENCOV_TAG_END */
+};
+
+#endif /* __XEN_PUBLIC_GCOV_H__ */
+
--- /dev/null
+++ b/sys/src/9/xen/xen-public/grant_table.h
@@ -1,0 +1,662 @@
+/******************************************************************************
+ * grant_table.h
+ *
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+#include "xen.h"
+
+/*
+ * `incontents 150 gnttab Grant Tables
+ *
+ * Xen's grant tables provide a generic mechanism to memory sharing
+ * between domains. This shared memory interface underpins the split
+ * device drivers for block and network IO.
+ *
+ * Each domain has its own grant table. This is a data structure that
+ * is shared with Xen; it allows the domain to tell Xen what kind of
+ * permissions other domains have on its pages. Entries in the grant
+ * table are identified by grant references. A grant reference is an
+ * integer, which indexes into the grant table. It acts as a
+ * capability which the grantee can use to perform operations on the
+ * granter’s memory.
+ *
+ * This capability-based system allows shared-memory communications
+ * between unprivileged domains. A grant reference also encapsulates
+ * the details of a shared page, removing the need for a domain to
+ * know the real machine address of a page it is sharing. This makes
+ * it possible to share memory correctly with domains running in
+ * fully virtualised memory.
+ */
+
+/***********************************
+ * GRANT TABLE REPRESENTATION
+ */
+
+/* Some rough guidelines on accessing and updating grant-table entries
+ * in a concurrency-safe manner. For more information, Linux contains a
+ * reference implementation for guest OSes (drivers/xen/grant_table.c, see
+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD
+ *
+ * NB. WMB is a no-op on current-generation x86 processors. However, a
+ * compiler barrier will still be required.
+ *
+ * Introducing a valid entry into the grant table:
+ * 1. Write ent->domid.
+ * 2. Write ent->frame:
+ * GTF_permit_access: Frame to which access is permitted.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ * frame, or zero if none.
+ * 3. Write memory barrier (WMB).
+ * 4. Write ent->flags, inc. valid type.
+ *
+ * Invalidating an unused GTF_permit_access entry:
+ * 1. flags = ent->flags.
+ * 2. Observe that !(flags & (GTF_reading|GTF_writing)).
+ * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ * NB. No need for WMB as reuse of entry is control-dependent on success of
+ * step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *
+ * Invalidating an in-use GTF_permit_access entry:
+ * This cannot be done directly. Request assistance from the domain controller
+ * which can set a timeout on the use of a grant entry and take necessary
+ * action. (NB. This is not yet implemented!).
+ *
+ * Invalidating an unused GTF_accept_transfer entry:
+ * 1. flags = ent->flags.
+ * 2. Observe that !(flags & GTF_transfer_committed). [*]
+ * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ * NB. No need for WMB as reuse of entry is control-dependent on success of
+ * step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ * [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
+ * The guest must /not/ modify the grant entry until the address of the
+ * transferred frame is written. It is safe for the guest to spin waiting
+ * for this to occur (detect by observing GTF_transfer_completed in
+ * ent->flags).
+ *
+ * Invalidating a committed GTF_accept_transfer entry:
+ * 1. Wait for (ent->flags & GTF_transfer_completed).
+ *
+ * Changing a GTF_permit_access from writable to read-only:
+ * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
+ *
+ * Changing a GTF_permit_access from read-only to writable:
+ * Use SMP-safe bit-setting instruction.
+ */
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
+ * A grant table comprises a packed array of grant entries in one or more
+ * page frames shared between Xen and a guest.
+ * [XEN]: This field is written by Xen and read by the sharing guest.
+ * [GST]: This field is written by the guest and read by Xen.
+ */
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility. New guests should use version 2.
+ */
+#if __XEN_INTERFACE_VERSION__ < 0x0003020a
+#define grant_entry_v1 grant_entry
+#define grant_entry_v1_t grant_entry_t
+#endif
+struct grant_entry_v1 {
+ /* GTF_xxx: various type and flag information. [XEN,GST] */
+ uint16_t flags;
+ /* The domain being granted foreign privileges. [GST] */
+ domid_t domid;
+ /*
+ * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
+ * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
+ */
+ uint32_t frame;
+};
+typedef struct grant_entry_v1 grant_entry_v1_t;
+
+/* The first few grant table entries will be preserved across grant table
+ * version changes and may be pre-populated at domain creation by tools.
+ */
+#define GNTTAB_NR_RESERVED_ENTRIES 8
+#define GNTTAB_RESERVED_CONSOLE 0
+#define GNTTAB_RESERVED_XENSTORE 1
+
+/*
+ * Type of grant entry.
+ * GTF_invalid: This grant entry grants no privileges.
+ * GTF_permit_access: Allow @domid to map/access @frame.
+ * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
+ * to this guest. Xen writes the page number to @frame.
+ * GTF_transitive: Allow @domid to transitively access a subrange of
+ * @trans_grant in @trans_domid. No mappings are allowed.
+ */
+#define GTF_invalid (0U<<0)
+#define GTF_permit_access (1U<<0)
+#define GTF_accept_transfer (2U<<0)
+#define GTF_transitive (3U<<0)
+#define GTF_type_mask (3U<<0)
+
+/*
+ * Subflags for GTF_permit_access.
+ * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
+ * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
+ * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST]
+ * GTF_sub_page: Grant access to only a subrange of the page. @domid
+ * will only be allowed to copy from the grant, and not
+ * map it. [GST]
+ */
+#define _GTF_readonly (2)
+#define GTF_readonly (1U<<_GTF_readonly)
+#define _GTF_reading (3)
+#define GTF_reading (1U<<_GTF_reading)
+#define _GTF_writing (4)
+#define GTF_writing (1U<<_GTF_writing)
+#define _GTF_PWT (5)
+#define GTF_PWT (1U<<_GTF_PWT)
+#define _GTF_PCD (6)
+#define GTF_PCD (1U<<_GTF_PCD)
+#define _GTF_PAT (7)
+#define GTF_PAT (1U<<_GTF_PAT)
+#define _GTF_sub_page (8)
+#define GTF_sub_page (1U<<_GTF_sub_page)
+
+/*
+ * Subflags for GTF_accept_transfer:
+ * GTF_transfer_committed: Xen sets this flag to indicate that it is committed
+ * to transferring ownership of a page frame. When a guest sees this flag
+ * it must /not/ modify the grant entry until GTF_transfer_completed is
+ * set by Xen.
+ * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
+ * after reading GTF_transfer_committed. Xen will always write the frame
+ * address, followed by ORing this flag, in a timely manner.
+ */
+#define _GTF_transfer_committed (2)
+#define GTF_transfer_committed (1U<<_GTF_transfer_committed)
+#define _GTF_transfer_completed (3)
+#define GTF_transfer_completed (1U<<_GTF_transfer_completed)
+
+/*
+ * Version 2 grant table entries. These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+/*
+ * Version 1 and version 2 grant entries share a common prefix. The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
+ */
+struct grant_entry_header {
+ uint16_t flags;
+ domid_t domid;
+};
+typedef struct grant_entry_header grant_entry_header_t;
+
+/*
+ * Version 2 of the grant entry structure.
+ */
+union grant_entry_v2 {
+ grant_entry_header_t hdr;
+
+ /*
+ * This member is used for V1-style full page grants, where either:
+ *
+ * -- hdr.type is GTF_accept_transfer, or
+ * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
+ *
+ * In that case, the frame field has the same semantics as the
+ * field of the same name in the V1 entry structure.
+ */
+ struct {
+ grant_entry_header_t hdr;
+ uint32_t pad0;
+ uint64_t frame;
+ } full_page;
+
+ /*
+ * If the grant type is GTF_grant_access and GTF_sub_page is set,
+ * @domid is allowed to access bytes [@page_off,@page_off+@length)
+ * in frame @frame.
+ */
+ struct {
+ grant_entry_header_t hdr;
+ uint16_t page_off;
+ uint16_t length;
+ uint64_t frame;
+ } sub_page;
+
+ /*
+ * If the grant is GTF_transitive, @domid is allowed to use the
+ * grant @gref in domain @trans_domid, as if it was the local
+ * domain. Obviously, the transitive access must be compatible
+ * with the original grant.
+ *
+ * The current version of Xen does not allow transitive grants
+ * to be mapped.
+ */
+ struct {
+ grant_entry_header_t hdr;
+ domid_t trans_domid;
+ uint16_t pad0;
+ grant_ref_t gref;
+ } transitive;
+
+ uint32_t __spacer[4]; /* Pad to a power of two */
+};
+typedef union grant_entry_v2 grant_entry_v2_t;
+
+typedef uint16_t grant_status_t;
+
+#endif /* __XEN_INTERFACE_VERSION__ */
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
+ */
+
+/* ` enum neg_errnoval
+ * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd,
+ * ` void *args,
+ * ` unsigned int count)
+ * `
+ *
+ * @args points to an array of a per-command data structure. The array
+ * has @count members
+ */
+
+/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */
+#define GNTTABOP_map_grant_ref 0
+#define GNTTABOP_unmap_grant_ref 1
+#define GNTTABOP_setup_table 2
+#define GNTTABOP_dump_table 3
+#define GNTTABOP_transfer 4
+#define GNTTABOP_copy 5
+#define GNTTABOP_query_size 6
+#define GNTTABOP_unmap_and_replace 7
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+#define GNTTABOP_set_version 8
+#define GNTTABOP_get_status_frames 9
+#define GNTTABOP_get_version 10
+#define GNTTABOP_swap_grant_ref 11
+#endif /* __XEN_INTERFACE_VERSION__ */
+/* ` } */
+
+/*
+ * Handle to track a mapping created via a grant reference.
+ */
+typedef uint32_t grant_handle_t;
+
+/*
+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
+ * by devices and/or host CPUs. If successful, <handle> is a tracking number
+ * that must be presented later to destroy the mapping(s). On error, <handle>
+ * is a negative status code.
+ * NOTES:
+ * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
+ * via which I/O devices may access the granted frame.
+ * 2. If GNTMAP_host_map is specified then a mapping will be added at
+ * either a host virtual address in the current address space, or at
+ * a PTE at the specified machine address. The type of mapping to
+ * perform is selected through the GNTMAP_contains_pte flag, and the
+ * address is specified in <host_addr>.
+ * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
+ * host mapping is destroyed by other means then it is *NOT* guaranteed
+ * to be accounted to the correct grant reference!
+ */
+struct gnttab_map_grant_ref {
+ /* IN parameters. */
+ uint64_t host_addr;
+ uint32_t flags; /* GNTMAP_* */
+ grant_ref_t ref;
+ domid_t dom;
+ /* OUT parameters. */
+ int16_t status; /* => enum grant_status */
+ grant_handle_t handle;
+ uint64_t dev_bus_addr;
+};
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
+
+/*
+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
+ * field is ignored. If non-zero, they must refer to a device/host mapping
+ * that is tracked by <handle>
+ * NOTES:
+ * 1. The call may fail in an undefined manner if either mapping is not
+ * tracked by <handle>.
+ * 3. After executing a batch of unmaps, it is guaranteed that no stale
+ * mappings will remain in the device or host TLBs.
+ */
+struct gnttab_unmap_grant_ref {
+ /* IN parameters. */
+ uint64_t host_addr;
+ uint64_t dev_bus_addr;
+ grant_handle_t handle;
+ /* OUT parameters. */
+ int16_t status; /* => enum grant_status */
+};
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
+
+/*
+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
+ * Only <nr_frames> addresses are written, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ * 3. Xen may not support more than a single grant-table page per domain.
+ */
+struct gnttab_setup_table {
+ /* IN parameters. */
+ domid_t dom;
+ uint32_t nr_frames;
+ /* OUT parameters. */
+ int16_t status; /* => enum grant_status */
+#if __XEN_INTERFACE_VERSION__ < 0x00040300
+ XEN_GUEST_HANDLE(ulong) frame_list;
+#else
+ XEN_GUEST_HANDLE(xen_pfn_t) frame_list;
+#endif
+};
+typedef struct gnttab_setup_table gnttab_setup_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
+
+/*
+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
+ * xen console. Debugging use only.
+ */
+struct gnttab_dump_table {
+ /* IN parameters. */
+ domid_t dom;
+ /* OUT parameters. */
+ int16_t status; /* => enum grant_status */
+};
+typedef struct gnttab_dump_table gnttab_dump_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
+
+/*
+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
+ * foreign domain has previously registered its interest in the transfer via
+ * <domid, ref>.
+ *
+ * Note that, even if the transfer fails, the specified page no longer belongs
+ * to the calling domain *unless* the error is GNTST_bad_page.
+ */
+struct gnttab_transfer {
+ /* IN parameters. */
+ xen_pfn_t mfn;
+ domid_t domid;
+ grant_ref_t ref;
+ /* OUT parameters. */
+ int16_t status;
+};
+typedef struct gnttab_transfer gnttab_transfer_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
+
+
+/*
+ * GNTTABOP_copy: Hypervisor based copy
+ * source and destinations can be eithers MFNs or, for foreign domains,
+ * grant references. the foreign domain has to grant read/write access
+ * in its grant table.
+ *
+ * The flags specify what type source and destinations are (either MFN
+ * or grant reference).
+ *
+ * Note that this can also be used to copy data between two domains
+ * via a third party if the source and destination domains had previously
+ * grant appropriate access to their pages to the third party.
+ *
+ * source_offset specifies an offset in the source frame, dest_offset
+ * the offset in the target frame and len specifies the number of
+ * bytes to be copied.
+ */
+
+#define _GNTCOPY_source_gref (0)
+#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref)
+#define _GNTCOPY_dest_gref (1)
+#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref)
+
+struct gnttab_copy {
+ /* IN parameters. */
+ struct {
+ union {
+ grant_ref_t ref;
+ xen_pfn_t gmfn;
+ } u;
+ domid_t domid;
+ uint16_t offset;
+ } source, dest;
+ uint16_t len;
+ uint16_t flags; /* GNTCOPY_* */
+ /* OUT parameters. */
+ int16_t status;
+};
+typedef struct gnttab_copy gnttab_copy_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
+
+/*
+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
+ * grant table.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+struct gnttab_query_size {
+ /* IN parameters. */
+ domid_t dom;
+ /* OUT parameters. */
+ uint32_t nr_frames;
+ uint32_t max_nr_frames;
+ int16_t status; /* => enum grant_status */
+};
+typedef struct gnttab_query_size gnttab_query_size_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>. <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ * 1. The call may fail in an undefined manner if either mapping is not
+ * tracked by <handle>.
+ * 2. After executing a batch of unmaps, it is guaranteed that no stale
+ * mappings will remain in the device or host TLBs.
+ */
+struct gnttab_unmap_and_replace {
+ /* IN parameters. */
+ uint64_t host_addr;
+ uint64_t new_addr;
+ grant_handle_t handle;
+ /* OUT parameters. */
+ int16_t status; /* => enum grant_status */
+};
+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
+
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure. This operation can only be performed
+ * once in any given domain. It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+struct gnttab_set_version {
+ /* IN/OUT parameters */
+ uint32_t version;
+};
+typedef struct gnttab_set_version gnttab_set_version_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
+
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+struct gnttab_get_status_frames {
+ /* IN parameters. */
+ uint32_t nr_frames;
+ domid_t dom;
+ /* OUT parameters. */
+ int16_t status; /* => enum grant_status */
+ XEN_GUEST_HANDLE(uint64_t) frame_list;
+};
+typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);
+
+/*
+ * GNTTABOP_get_version: Get the grant table version which is in
+ * effect for domain <dom>.
+ */
+struct gnttab_get_version {
+ /* IN parameters */
+ domid_t dom;
+ uint16_t pad;
+ /* OUT parameters */
+ uint32_t version;
+};
+typedef struct gnttab_get_version gnttab_get_version_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);
+
+/*
+ * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries.
+ */
+struct gnttab_swap_grant_ref {
+ /* IN parameters */
+ grant_ref_t ref_a;
+ grant_ref_t ref_b;
+ /* OUT parameters */
+ int16_t status; /* => enum grant_status */
+};
+typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t);
+
+#endif /* __XEN_INTERFACE_VERSION__ */
+
+/*
+ * Bitfield values for gnttab_map_grant_ref.flags.
+ */
+ /* Map the grant entry for access by I/O devices. */
+#define _GNTMAP_device_map (0)
+#define GNTMAP_device_map (1<<_GNTMAP_device_map)
+ /* Map the grant entry for access by host CPUs. */
+#define _GNTMAP_host_map (1)
+#define GNTMAP_host_map (1<<_GNTMAP_host_map)
+ /* Accesses to the granted frame will be restricted to read-only access. */
+#define _GNTMAP_readonly (2)
+#define GNTMAP_readonly (1<<_GNTMAP_readonly)
+ /*
+ * GNTMAP_host_map subflag:
+ * 0 => The host mapping is usable only by the guest OS.
+ * 1 => The host mapping is usable by guest OS + current application.
+ */
+#define _GNTMAP_application_map (3)
+#define GNTMAP_application_map (1<<_GNTMAP_application_map)
+
+ /*
+ * GNTMAP_contains_pte subflag:
+ * 0 => This map request contains a host virtual address.
+ * 1 => This map request contains the machine addess of the PTE to update.
+ */
+#define _GNTMAP_contains_pte (4)
+#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte)
+
+#define _GNTMAP_can_fail (5)
+#define GNTMAP_can_fail (1<<_GNTMAP_can_fail)
+
+/*
+ * Bits to be placed in guest kernel available PTE bits (architecture
+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
+ */
+#define _GNTMAP_guest_avail0 (16)
+#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0)
+
+/*
+ * Values for error status returns. All errors are -ve.
+ */
+/* ` enum grant_status { */
+#define GNTST_okay (0) /* Normal return. */
+#define GNTST_general_error (-1) /* General undefined error. */
+#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */
+#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
+#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */
+#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */
+#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
+#define GNTST_bad_page (-9) /* Specified page was invalid for op. */
+#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */
+#define GNTST_address_too_big (-11) /* transfer page address too large. */
+#define GNTST_eagain (-12) /* Operation not done; try again. */
+/* ` } */
+
+#define GNTTABOP_error_msgs { \
+ "okay", \
+ "undefined error", \
+ "unrecognised domain id", \
+ "invalid grant reference", \
+ "invalid mapping handle", \
+ "invalid virtual address", \
+ "invalid device address", \
+ "no spare translation slot in the I/O MMU", \
+ "permission denied", \
+ "bad page", \
+ "copy arguments cross page boundary", \
+ "page address size too large", \
+ "operation not done; try again" \
+}
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/e820.h
@@ -1,0 +1,34 @@
+
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_E820_H__
+#define __XEN_PUBLIC_HVM_E820_H__
+
+/* E820 location in HVM virtual address space. */
+#define HVM_E820_PAGE 0x00090000
+#define HVM_E820_NR_OFFSET 0x000001E8
+#define HVM_E820_OFFSET 0x000002D0
+
+#define HVM_BELOW_4G_RAM_END 0xF0000000
+#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END
+#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
+
+#endif /* __XEN_PUBLIC_HVM_E820_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/hvm_info_table.h
@@ -1,0 +1,72 @@
+/******************************************************************************
+ * hvm/hvm_info_table.h
+ *
+ * HVM parameter and information table, written into guest memory map.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+
+#define HVM_INFO_PFN 0x09F
+#define HVM_INFO_OFFSET 0x800
+#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)
+
+/* Maximum we can support with current vLAPIC ID mapping. */
+#define HVM_MAX_VCPUS 128
+
+struct hvm_info_table {
+ char signature[8]; /* "HVM INFO" */
+ uint32_t length;
+ uint8_t checksum;
+
+ /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */
+ uint8_t apic_mode;
+
+ /* How many CPUs does this domain have? */
+ uint32_t nr_vcpus;
+
+ /*
+ * MEMORY MAP provided by HVM domain builder.
+ * Notes:
+ * 1. page_to_phys(x) = x << 12
+ * 2. If a field is zero, the corresponding range does not exist.
+ */
+ /*
+ * 0x0 to page_to_phys(low_mem_pgend)-1:
+ * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF)
+ */
+ uint32_t low_mem_pgend;
+ /*
+ * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF:
+ * Reserved for special memory mappings
+ */
+ uint32_t reserved_mem_pgstart;
+ /*
+ * 0x100000000 to page_to_phys(high_mem_pgend)-1:
+ * RAM above 4GB
+ */
+ uint32_t high_mem_pgend;
+
+ /* Bitmap of which CPUs are online at boot time. */
+ uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8];
+};
+
+#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/hvm_op.h
@@ -1,0 +1,275 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+#include "../xen.h"
+#include "../trace.h"
+
+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
+#define HVMOP_set_param 0
+#define HVMOP_get_param 1
+struct xen_hvm_param {
+ domid_t domid; /* IN */
+ uint32_t index; /* IN */
+ uint64_t value; /* IN/OUT */
+};
+typedef struct xen_hvm_param xen_hvm_param_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t);
+
+/* Set the logical level of one of a domain's PCI INTx wires. */
+#define HVMOP_set_pci_intx_level 2
+struct xen_hvm_set_pci_intx_level {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
+ uint8_t domain, bus, device, intx;
+ /* Assertion level (0 = unasserted, 1 = asserted). */
+ uint8_t level;
+};
+typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t);
+
+/* Set the logical level of one of a domain's ISA IRQ wires. */
+#define HVMOP_set_isa_irq_level 3
+struct xen_hvm_set_isa_irq_level {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* ISA device identification, by ISA IRQ (0-15). */
+ uint8_t isa_irq;
+ /* Assertion level (0 = unasserted, 1 = asserted). */
+ uint8_t level;
+};
+typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t);
+
+#define HVMOP_set_pci_link_route 4
+struct xen_hvm_set_pci_link_route {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* PCI link identifier (0-3). */
+ uint8_t link;
+ /* ISA IRQ (1-15), or 0 (disable link). */
+ uint8_t isa_irq;
+};
+typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
+
+/* Flushes all VCPU TLBs: @arg must be NULL. */
+#define HVMOP_flush_tlbs 5
+
+typedef enum {
+ HVMMEM_ram_rw, /* Normal read/write guest RAM */
+ HVMMEM_ram_ro, /* Read-only; writes are discarded */
+ HVMMEM_mmio_dm, /* Reads and write go to the device model */
+} hvmmem_type_t;
+
+/* Following tools-only interfaces may change in future. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+/* Track dirty VRAM. */
+#define HVMOP_track_dirty_vram 6
+struct xen_hvm_track_dirty_vram {
+ /* Domain to be tracked. */
+ domid_t domid;
+ /* First pfn to track. */
+ uint64_aligned_t first_pfn;
+ /* Number of pages to track. */
+ uint64_aligned_t nr;
+ /* OUT variable. */
+ /* Dirty bitmap buffer. */
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
+};
+typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t);
+
+/* Notify that some pages got modified by the Device Model. */
+#define HVMOP_modified_memory 7
+struct xen_hvm_modified_memory {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* First pfn. */
+ uint64_aligned_t first_pfn;
+ /* Number of pages. */
+ uint64_aligned_t nr;
+};
+typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t);
+
+#define HVMOP_set_mem_type 8
+/* Notify that a region of memory is to be treated in a specific way. */
+struct xen_hvm_set_mem_type {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* Memory type */
+ uint16_t hvmmem_type;
+ /* Number of pages. */
+ uint32_t nr;
+ /* First pfn. */
+ uint64_aligned_t first_pfn;
+};
+typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+/* Hint from PV drivers for pagetable destruction. */
+#define HVMOP_pagetable_dying 9
+struct xen_hvm_pagetable_dying {
+ /* Domain with a pagetable about to be destroyed. */
+ domid_t domid;
+ uint16_t pad[3]; /* align next field on 8-byte boundary */
+ /* guest physical address of the toplevel pagetable dying */
+ uint64_t gpa;
+};
+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t);
+
+/* Get the current Xen time, in nanoseconds since system boot. */
+#define HVMOP_get_time 10
+struct xen_hvm_get_time {
+ uint64_t now; /* OUT */
+};
+typedef struct xen_hvm_get_time xen_hvm_get_time_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t);
+
+#define HVMOP_xentrace 11
+struct xen_hvm_xentrace {
+ uint16_t event, extra_bytes;
+ uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)];
+};
+typedef struct xen_hvm_xentrace xen_hvm_xentrace_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t);
+
+/* Following tools-only interfaces may change in future. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#define HVMOP_set_mem_access 12
+typedef enum {
+ HVMMEM_access_n,
+ HVMMEM_access_r,
+ HVMMEM_access_w,
+ HVMMEM_access_rw,
+ HVMMEM_access_x,
+ HVMMEM_access_rx,
+ HVMMEM_access_wx,
+ HVMMEM_access_rwx,
+ HVMMEM_access_rx2rw, /* Page starts off as r-x, but automatically
+ * change to r-w on a write */
+ HVMMEM_access_n2rwx, /* Log access: starts off as n, automatically
+ * goes to rwx, generating an event without
+ * pausing the vcpu */
+ HVMMEM_access_default /* Take the domain default */
+} hvmmem_access_t;
+/* Notify that a region of memory is to have specific access types */
+struct xen_hvm_set_mem_access {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* Memory type */
+ uint16_t hvmmem_access; /* hvm_access_t */
+ /* Number of pages, ignored on setting default access */
+ uint32_t nr;
+ /* First pfn, or ~0ull to set the default access for new pages */
+ uint64_aligned_t first_pfn;
+};
+typedef struct xen_hvm_set_mem_access xen_hvm_set_mem_access_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_t);
+
+#define HVMOP_get_mem_access 13
+/* Get the specific access type for that region of memory */
+struct xen_hvm_get_mem_access {
+ /* Domain to be queried. */
+ domid_t domid;
+ /* Memory type: OUT */
+ uint16_t hvmmem_access; /* hvm_access_t */
+ /* pfn, or ~0ull for default access for new pages. IN */
+ uint64_aligned_t pfn;
+};
+typedef struct xen_hvm_get_mem_access xen_hvm_get_mem_access_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_access_t);
+
+#define HVMOP_inject_trap 14
+/* Inject a trap into a VCPU, which will get taken up on the next
+ * scheduling of it. Note that the caller should know enough of the
+ * state of the CPU before injecting, to know what the effect of
+ * injecting the trap will be.
+ */
+struct xen_hvm_inject_trap {
+ /* Domain to be queried. */
+ domid_t domid;
+ /* VCPU */
+ uint32_t vcpuid;
+ /* Vector number */
+ uint32_t vector;
+ /* Trap type (HVMOP_TRAP_*) */
+ uint32_t type;
+/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */
+# define HVMOP_TRAP_ext_int 0 /* external interrupt */
+# define HVMOP_TRAP_nmi 2 /* nmi */
+# define HVMOP_TRAP_hw_exc 3 /* hardware exception */
+# define HVMOP_TRAP_sw_int 4 /* software interrupt (CD nn) */
+# define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */
+# define HVMOP_TRAP_sw_exc 6 /* INT3 (CC), INTO (CE) */
+ /* Error code, or ~0u to skip */
+ uint32_t error_code;
+ /* Intruction length */
+ uint32_t insn_len;
+ /* CR2 for page faults */
+ uint64_aligned_t cr2;
+};
+typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t);
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#define HVMOP_get_mem_type 15
+/* Return hvmmem_type_t for the specified pfn. */
+struct xen_hvm_get_mem_type {
+ /* Domain to be queried. */
+ domid_t domid;
+ /* OUT variable. */
+ uint16_t mem_type;
+ uint16_t pad[2]; /* align next field on 8-byte boundary */
+ /* IN variable. */
+ uint64_t pfn;
+};
+typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t);
+
+/* Following tools-only interfaces may change in future. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+/* MSI injection for emulated devices */
+#define HVMOP_inject_msi 16
+struct xen_hvm_inject_msi {
+ /* Domain to be injected */
+ domid_t domid;
+ /* Data -- lower 32 bits */
+ uint32_t data;
+ /* Address (0xfeexxxxx) */
+ uint64_t addr;
+};
+typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t);
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/hvm_xs_strings.h
@@ -1,0 +1,80 @@
+/******************************************************************************
+ * hvm/hvm_xs_strings.h
+ *
+ * HVM xenstore strings used in HVMLOADER.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__
+#define __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__
+
+#define HVM_XS_HVMLOADER "hvmloader"
+#define HVM_XS_BIOS "hvmloader/bios"
+#define HVM_XS_GENERATION_ID_ADDRESS "hvmloader/generation-id-address"
+#define HVM_XS_ALLOW_MEMORY_RELOCATE "hvmloader/allow-memory-relocate"
+
+/* The following values allow additional ACPI tables to be added to the
+ * virtual ACPI BIOS that hvmloader constructs. The values specify the guest
+ * physical address and length of a block of ACPI tables to add. The format of
+ * the block is simply concatenated raw tables (which specify their own length
+ * in the ACPI header).
+ */
+#define HVM_XS_ACPI_PT_ADDRESS "hvmloader/acpi/address"
+#define HVM_XS_ACPI_PT_LENGTH "hvmloader/acpi/length"
+
+/* Any number of SMBIOS types can be passed through to an HVM guest using
+ * the following xenstore values. The values specify the guest physical
+ * address and length of a block of SMBIOS structures for hvmloader to use.
+ * The block is formatted in the following way:
+ *
+ * <length><struct><length><struct>...
+ *
+ * Each length separator is a 32b integer indicating the length of the next
+ * SMBIOS structure. For DMTF defined types (0 - 121), the passed in struct
+ * will replace the default structure in hvmloader. In addition, any
+ * OEM/vendortypes (128 - 255) will all be added.
+ */
+#define HVM_XS_SMBIOS_PT_ADDRESS "hvmloader/smbios/address"
+#define HVM_XS_SMBIOS_PT_LENGTH "hvmloader/smbios/length"
+
+/* Set to 1 to enable SMBIOS default portable battery (type 22) values. */
+#define HVM_XS_SMBIOS_DEFAULT_BATTERY "hvmloader/smbios/default_battery"
+
+/* The following xenstore values are used to override some of the default
+ * string values in the SMBIOS table constructed in hvmloader.
+ */
+#define HVM_XS_BIOS_STRINGS "bios-strings"
+#define HVM_XS_BIOS_VENDOR "bios-strings/bios-vendor"
+#define HVM_XS_BIOS_VERSION "bios-strings/bios-version"
+#define HVM_XS_SYSTEM_MANUFACTURER "bios-strings/system-manufacturer"
+#define HVM_XS_SYSTEM_PRODUCT_NAME "bios-strings/system-product-name"
+#define HVM_XS_SYSTEM_VERSION "bios-strings/system-version"
+#define HVM_XS_SYSTEM_SERIAL_NUMBER "bios-strings/system-serial-number"
+#define HVM_XS_ENCLOSURE_MANUFACTURER "bios-strings/enclosure-manufacturer"
+#define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number"
+#define HVM_XS_BATTERY_MANUFACTURER "bios-strings/battery-manufacturer"
+#define HVM_XS_BATTERY_DEVICE_NAME "bios-strings/battery-device-name"
+
+/* 1 to 99 OEM strings can be set in xenstore using values of the form
+ * below. These strings will be loaded into the SMBIOS type 11 structure.
+ */
+#define HVM_XS_OEM_STRINGS "bios-strings/oem-%d"
+
+#endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/ioreq.h
@@ -1,0 +1,122 @@
+/*
+ * ioreq.h: I/O request definitions for device models
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _IOREQ_H_
+#define _IOREQ_H_
+
+#define IOREQ_READ 1
+#define IOREQ_WRITE 0
+
+#define STATE_IOREQ_NONE 0
+#define STATE_IOREQ_READY 1
+#define STATE_IOREQ_INPROCESS 2
+#define STATE_IORESP_READY 3
+
+#define IOREQ_TYPE_PIO 0 /* pio */
+#define IOREQ_TYPE_COPY 1 /* mmio ops */
+#define IOREQ_TYPE_TIMEOFFSET 7
+#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */
+
+/*
+ * VMExit dispatcher should cooperate with instruction decoder to
+ * prepare this structure and notify service OS and DM by sending
+ * virq
+ */
+struct ioreq {
+ uint64_t addr; /* physical address */
+ uint64_t data; /* data (or paddr of data) */
+ uint32_t count; /* for rep prefixes */
+ uint32_t size; /* size in bytes */
+ uint32_t vp_eport; /* evtchn for notifications to/from device model */
+ uint16_t _pad0;
+ uint8_t state:4;
+ uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr
+ * of the real data to use. */
+ uint8_t dir:1; /* 1=read, 0=write */
+ uint8_t df:1;
+ uint8_t _pad1:1;
+ uint8_t type; /* I/O type */
+};
+typedef struct ioreq ioreq_t;
+
+struct shared_iopage {
+ struct ioreq vcpu_ioreq[1];
+};
+typedef struct shared_iopage shared_iopage_t;
+
+struct buf_ioreq {
+ uint8_t type; /* I/O type */
+ uint8_t pad:1;
+ uint8_t dir:1; /* 1=read, 0=write */
+ uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */
+ uint32_t addr:20;/* physical address */
+ uint32_t data; /* data */
+};
+typedef struct buf_ioreq buf_ioreq_t;
+
+#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */
+struct buffered_iopage {
+ unsigned int read_pointer;
+ unsigned int write_pointer;
+ buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
+}; /* NB. Size of this structure must be no greater than one page. */
+typedef struct buffered_iopage buffered_iopage_t;
+
+/*
+ * ACPI Control/Event register locations. Location is controlled by a
+ * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION.
+ */
+
+/* Version 0 (default): Traditional Xen locations. */
+#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40
+#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04)
+#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08)
+#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20)
+#define ACPI_GPE0_BLK_LEN_V0 0x08
+
+/* Version 1: Locations preferred by modern Qemu. */
+#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000
+#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04)
+#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08)
+#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0
+#define ACPI_GPE0_BLK_LEN_V1 0x04
+
+/* Compatibility definitions for the default location (version 0). */
+#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0
+#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0
+#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0
+#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0
+#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0
+
+
+#endif /* _IOREQ_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/params.h
@@ -1,0 +1,150 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include "hvm_op.h"
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
+ * Domain = val[47:32], Bus = val[31:16],
+ * DevFn = val[15: 8], IntX = val[ 1: 0]
+ * val[63:56] == 2: val[7:0] is a vector number, check for
+ * XENFEAT_hvm_callback_vector to know if this delivery
+ * method is available.
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ */
+#define HVM_PARAM_CALLBACK_IRQ 0
+
+/*
+ * These are not used by Xen. They are here for convenience of HVM-guest
+ * xenbus implementations.
+ */
+#define HVM_PARAM_STORE_PFN 1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_PAE_ENABLED 4
+
+#define HVM_PARAM_IOREQ_PFN 5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+#define HVM_PARAM_BUFIOREQ_EVTCHN 26
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/* Expose Viridian interfaces to this HVM guest? */
+#define HVM_PARAM_VIRIDIAN 9
+
+#endif
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ * delay_for_missed_ticks (default):
+ * Do not advance a vcpu's time beyond the correct delivery time for
+ * interrupts that have been missed due to preemption. Deliver missed
+ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ * time stepwise for each one.
+ * no_delay_for_missed_ticks:
+ * As above, missed interrupts are delivered, but guest time always tracks
+ * wallclock (i.e., real) time while doing so.
+ * no_missed_ticks_pending:
+ * No missed interrupts are held pending. Instead, to ensure ticks are
+ * delivered at some non-zero rate, if we detect missed ticks then the
+ * internal tick alarm is not disabled if the VCPU is preempted during the
+ * next tick period.
+ * one_missed_tick_pending:
+ * Missed interrupts are collapsed together and delivered as one 'late tick'.
+ * Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE 10
+#define HVMPTM_delay_for_missed_ticks 0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending 2
+#define HVMPTM_one_missed_tick_pending 3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT 12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN 13
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS 15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN 16
+
+/* Console debug shared memory ring and event channel */
+#define HVM_PARAM_CONSOLE_PFN 17
+#define HVM_PARAM_CONSOLE_EVTCHN 18
+
+/*
+ * Select location of ACPI PM1a and TMR control blocks. Currently two locations
+ * are supported, specified by version 0 or 1 in this parameter:
+ * - 0: default, use the old addresses
+ * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48
+ * - 1: use the new default qemu addresses
+ * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008
+ * You can find these address definitions in <hvm/ioreq.h>
+ */
+#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19
+
+/* Enable blocking memory events, async or sync (pause vcpu until response)
+ * onchangeonly indicates messages only on a change of value */
+#define HVM_PARAM_MEMORY_EVENT_CR0 20
+#define HVM_PARAM_MEMORY_EVENT_CR3 21
+#define HVM_PARAM_MEMORY_EVENT_CR4 22
+#define HVM_PARAM_MEMORY_EVENT_INT3 23
+#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25
+#define HVM_PARAM_MEMORY_EVENT_MSR 30
+
+#define HVMPME_MODE_MASK (3 << 0)
+#define HVMPME_mode_disabled 0
+#define HVMPME_mode_async 1
+#define HVMPME_mode_sync 2
+#define HVMPME_onchangeonly (1 << 2)
+
+/* Boolean: Enable nestedhvm (hvm only) */
+#define HVM_PARAM_NESTEDHVM 24
+
+/* Params for the mem event rings */
+#define HVM_PARAM_PAGING_RING_PFN 27
+#define HVM_PARAM_ACCESS_RING_PFN 28
+#define HVM_PARAM_SHARING_RING_PFN 29
+
+/* SHUTDOWN_* action in case of a triple fault */
+#define HVM_PARAM_TRIPLE_FAULT_REASON 31
+
+#define HVM_NR_PARAMS 32
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/pvdrivers.h
@@ -1,0 +1,47 @@
+/*
+ * pvdrivers.h: Register of PV drivers product numbers.
+ * Copyright (c) 2012, Citrix Systems Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _XEN_PUBLIC_PVDRIVERS_H_
+#define _XEN_PUBLIC_PVDRIVERS_H_
+
+/*
+ * This is the master registry of product numbers for
+ * PV drivers.
+ * If you need a new product number allocating, please
+ * post to [email protected]. You should NOT use
+ * a product number without allocating one.
+ * If you maintain a separate versioning and distribution path
+ * for PV drivers you should have a separate product number so
+ * that your drivers can be separated from others.
+ *
+ * During development, you may use the product ID to
+ * indicate a driver which is yet to be released.
+ */
+
+#define PVDRIVERS_PRODUCT_LIST(EACH) \
+ EACH("xensource-windows", 0x0001) /* Citrix */ \
+ EACH("gplpv-windows", 0x0002) /* James Harper */ \
+ EACH("linux", 0x0003) \
+ EACH("experimental", 0xffff)
+
+#endif /* _XEN_PUBLIC_PVDRIVERS_H_ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/hvm/save.h
@@ -1,0 +1,111 @@
+/*
+ * hvm/save.h
+ *
+ * Structure definitions for HVM state that is held by Xen and must
+ * be saved along with the domain's memory and device-model state.
+ *
+ * Copyright (c) 2007 XenSource Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_SAVE_H__
+#define __XEN_PUBLIC_HVM_SAVE_H__
+
+/*
+ * Structures in this header *must* have the same layout in 32bit
+ * and 64bit environments: this means that all fields must be explicitly
+ * sized types and aligned to their sizes, and the structs must be
+ * a multiple of eight bytes long.
+ *
+ * Only the state necessary for saving and restoring (i.e. fields
+ * that are analogous to actual hardware state) should go in this file.
+ * Internal mechanisms should be kept in Xen-private headers.
+ */
+
+#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
+#error "Anonymous structs/unions are a GNU extension."
+#endif
+
+/*
+ * Each entry is preceded by a descriptor giving its type and length
+ */
+struct hvm_save_descriptor {
+ uint16_t typecode; /* Used to demux the various types below */
+ uint16_t instance; /* Further demux within a type */
+ uint32_t length; /* In bytes, *not* including this descriptor */
+};
+
+
+/*
+ * Each entry has a datatype associated with it: for example, the CPU state
+ * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU),
+ * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU).
+ * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system
+ * ugliness.
+ */
+
+#ifdef __XEN__
+# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \
+ static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \
+ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \
+ struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; }
+
+# include <xen/lib.h> /* BUG() */
+# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \
+ static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \
+ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \
+ struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; }
+#else
+# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \
+ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}
+
+# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \
+ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}
+#endif
+
+#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t)
+#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x)))
+#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c))
+
+#ifdef __XEN__
+# define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t)
+# define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x)))
+
+# define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1)
+# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst)
+#endif
+
+/*
+ * The series of save records is teminated by a zero-type, zero-length
+ * descriptor.
+ */
+
+struct hvm_save_end {};
+DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end);
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../arch-x86/hvm/save.h"
+#elif defined(__arm__) || defined(__aarch64__)
+#include "../arch-arm/hvm/save.h"
+#else
+#error "unsupported architecture"
+#endif
+
+#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/blkif.h
@@ -1,0 +1,556 @@
+/******************************************************************************
+ * blkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t uint16_t
+#endif
+#define blkif_sector_t uint64_t
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen block driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * All data in the XenStore is stored as strings. Nodes specifying numeric
+ * values are encoded in decimal. Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ * XenStore nodes marked "DEPRECATED" in their notes section should only be
+ * used to provide interoperability with legacy implementations.
+ *
+ * See the XenBus state transition diagram below for details on when XenBus
+ * nodes must be published and when they can be queried.
+ *
+ *****************************************************************************
+ * Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * mode
+ * Values: "r" (read only), "w" (writable)
+ *
+ * The read or write access permissions to the backing store to be
+ * granted to the frontend.
+ *
+ * params
+ * Values: string
+ *
+ * A free formatted string providing sufficient information for the
+ * backend driver to open the backing device. (e.g. the path to the
+ * file or block device representing the backing store.)
+ *
+ * type
+ * Values: "file", "phy", "tap"
+ *
+ * The type of the backing device/object.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-barrier
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-flush-cache
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-discard
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_DISCARD request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-persistent
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 7
+ *
+ * A value of "1" indicates that the backend can keep the grants used
+ * by the frontend driver mapped, so the same set of grants should be
+ * used in all transactions. The maximum number of grants the backend
+ * can map persistently depends on the implementation, but ideally it
+ * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
+ * feature the backend doesn't need to unmap each grant, preventing
+ * costly TLB flushes. The backend driver should only map grants
+ * persistently if the frontend supports it. If a backend driver chooses
+ * to use the persistent protocol when the frontend doesn't support it,
+ * it will probably hit the maximum number of persistently mapped grants
+ * (due to the fact that the frontend won't be reusing the same grants),
+ * and fall back to non-persistent mode. Backend implementations may
+ * shrink or expand the number of persistently mapped grants without
+ * notifying the frontend depending on memory constraints (this might
+ * cause a performance degradation).
+ *
+ * If a backend driver wants to limit the maximum number of persistently
+ * mapped grants to a value less than RING_SIZE *
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
+ * discard the grants that are less commonly used. Using a LRU in the
+ * backend driver paired with a LIFO queue in the frontend will
+ * allow us to have better performance in this scenario.
+ *
+ *----------------------- Request Transport Parameters ------------------------
+ *
+ * max-ring-page-order
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Notes: 1, 3
+ *
+ * The maximum supported size of the request ring buffer in units of
+ * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ * etc.).
+ *
+ * max-ring-pages
+ * Values: <uint32_t>
+ * Default Value: 1
+ * Notes: DEPRECATED, 2, 3
+ *
+ * The maximum supported size of the request ring buffer in units of
+ * machine pages. The value must be a power of 2.
+ *
+ *------------------------- Backend Device Properties -------------------------
+ *
+ * discard-aligment
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Notes: 4, 5
+ *
+ * The offset, in bytes from the beginning of the virtual block device,
+ * to the first, addressable, discard extent on the underlying device.
+ *
+ * discard-granularity
+ * Values: <uint32_t>
+ * Default Value: <"sector-size">
+ * Notes: 4
+ *
+ * The size, in bytes, of the individually addressable discard extents
+ * of the underlying device.
+ *
+ * discard-secure
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
+ * requests with the BLKIF_DISCARD_SECURE flag set.
+ *
+ * info
+ * Values: <uint32_t> (bitmap)
+ *
+ * A collection of bit flags describing attributes of the backing
+ * device. The VDISK_* macros define the meaning of each bit
+ * location.
+ *
+ * sector-size
+ * Values: <uint32_t>
+ *
+ * The logical sector size, in bytes, of the backend device.
+ *
+ * physical-sector-size
+ * Values: <uint32_t>
+ *
+ * The physical sector size, in bytes, of the backend device.
+ *
+ * sectors
+ * Values: <uint64_t>
+ *
+ * The size of the backend device, expressed in units of its logical
+ * sector size ("sector-size").
+ *
+ *****************************************************************************
+ * Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ * Values: <uint32_t>
+ *
+ * The identifier of the Xen event channel used to signal activity
+ * in the ring buffer.
+ *
+ * ring-ref
+ * Values: <uint32_t>
+ * Notes: 6
+ *
+ * The Xen grant reference granting permission for the backend to map
+ * the sole page in a single page sized ring buffer.
+ *
+ * ring-ref%u
+ * Values: <uint32_t>
+ * Notes: 6
+ *
+ * For a frontend providing a multi-page ring, a "number of ring pages"
+ * sized list of nodes, each containing a Xen grant reference granting
+ * permission for the backend to map the page of the ring located
+ * at page index "%u". Page indexes are zero based.
+ *
+ * protocol
+ * Values: string (XEN_IO_PROTO_ABI_*)
+ * Default Value: XEN_IO_PROTO_ABI_NATIVE
+ *
+ * The machine ABI rules governing the format of all ring request and
+ * response structures.
+ *
+ * ring-page-order
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
+ * Notes: 1, 3
+ *
+ * The size of the frontend allocated request ring buffer in units
+ * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ * etc.).
+ *
+ * num-ring-pages
+ * Values: <uint32_t>
+ * Default Value: 1
+ * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order))
+ * Notes: DEPRECATED, 2, 3
+ *
+ * The size of the frontend allocated request ring buffer in units of
+ * machine pages. The value must be a power of 2.
+ *
+ * feature-persistent
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 7, 8, 9
+ *
+ * A value of "1" indicates that the frontend will reuse the same grants
+ * for all transactions, allowing the backend to map them with write
+ * access (even when it should be read-only). If the frontend hits the
+ * maximum number of allowed persistently mapped grants, it can fallback
+ * to non persistent mode. This will cause a performance degradation,
+ * since the the backend driver will still try to map those grants
+ * persistently. Since the persistent grants protocol is compatible with
+ * the previous protocol, a frontend driver can choose to work in
+ * persistent mode even when the backend doesn't support it.
+ *
+ * It is recommended that the frontend driver stores the persistently
+ * mapped grants in a LIFO queue, so a subset of all persistently mapped
+ * grants gets used commonly. This is done in case the backend driver
+ * decides to limit the maximum number of persistently mapped grants
+ * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *
+ *------------------------- Virtual Device Properties -------------------------
+ *
+ * device-type
+ * Values: "disk", "cdrom", "floppy", etc.
+ *
+ * virtual-device
+ * Values: <uint32_t>
+ *
+ * A value indicating the physical device to virtualize within the
+ * frontend's domain. (e.g. "The first ATA disk", "The third SCSI
+ * disk", etc.)
+ *
+ * See docs/misc/vbd-interface.txt for details on the format of this
+ * value.
+ *
+ * Notes
+ * -----
+ * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
+ * PV drivers.
+ * (2) Multi-page ring buffer scheme first used in some RedHat distributions
+ * including a distribution deployed on certain nodes of the Amazon
+ * EC2 cluster.
+ * (3) Support for multi-page ring buffers was implemented independently,
+ * in slightly different forms, by both Citrix and RedHat/Amazon.
+ * For full interoperability, block front and backends should publish
+ * identical ring parameters, adjusted for unit differences, to the
+ * XenStore nodes used in both schemes.
+ * (4) Devices that support discard functionality may internally allocate
+ * space (discardable extents) in units that are larger than the
+ * exported logical block size.
+ * (5) The discard-alignment parameter allows a physical device to be
+ * partitioned into virtual devices that do not necessarily begin or
+ * end on a discardable extent boundary.
+ * (6) When there is only a single page allocated to the request ring,
+ * 'ring-ref' is used to communicate the grant reference for this
+ * page to the backend. When using a multi-page ring, the 'ring-ref'
+ * node is not created. Instead 'ring-ref0' - 'ring-refN' are used.
+ * (7) When using persistent grants data has to be copied from/to the page
+ * where the grant is currently mapped. The overhead of doing this copy
+ * however doesn't suppress the speed improvement of not having to unmap
+ * the grants.
+ * (8) The frontend driver has to allow the backend driver to map all grants
+ * with write access, even when they should be mapped read-only, since
+ * further requests may reuse these grants and require write permissions.
+ * (9) Linux implementation doesn't have a limit on the maximum number of
+ * grants that can be persistently mapped in the frontend driver, but
+ * due to the frontent driver implementation it should never be bigger
+ * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ */
+
+/*
+ * STATE DIAGRAMS
+ *
+ *****************************************************************************
+ * Startup *
+ *****************************************************************************
+ *
+ * Tool stack creates front and back nodes with state XenbusStateInitialising.
+ *
+ * Front Back
+ * ================================= =====================================
+ * XenbusStateInitialising XenbusStateInitialising
+ * o Query virtual device o Query backend device identification
+ * properties. data.
+ * o Setup OS device instance. o Open and validate backend device.
+ * o Publish backend features and
+ * transport parameters.
+ * |
+ * |
+ * V
+ * XenbusStateInitWait
+ *
+ * o Query backend features and
+ * transport parameters.
+ * o Allocate and initialize the
+ * request ring.
+ * o Publish transport parameters
+ * that will be in effect during
+ * this connection.
+ * |
+ * |
+ * V
+ * XenbusStateInitialised
+ *
+ * o Query frontend transport parameters.
+ * o Connect to the request ring and
+ * event channel.
+ * o Publish backend device properties.
+ * |
+ * |
+ * V
+ * XenbusStateConnected
+ *
+ * o Query backend device properties.
+ * o Finalize OS virtual device
+ * instance.
+ * |
+ * |
+ * V
+ * XenbusStateConnected
+ *
+ * Note: Drivers that do not support any optional features, or the negotiation
+ * of transport parameters, can skip certain states in the state machine:
+ *
+ * o A frontend may transition to XenbusStateInitialised without
+ * waiting for the backend to enter XenbusStateInitWait. In this
+ * case, default transport parameters are in effect and any
+ * transport parameters published by the frontend must contain
+ * their default values.
+ *
+ * o A backend may transition to XenbusStateInitialised, bypassing
+ * XenbusStateInitWait, without waiting for the frontend to first
+ * enter the XenbusStateInitialised state. In this case, default
+ * transport parameters are in effect and any transport parameters
+ * published by the backend must contain their default values.
+ *
+ * Drivers that support optional features and/or transport parameter
+ * negotiation must tolerate these additional state transition paths.
+ * In general this means performing the work of any skipped state
+ * transition, if it has not already been performed, in addition to the
+ * work associated with entry into the current state.
+ */
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ 0
+#define BLKIF_OP_WRITE 1
+/*
+ * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
+ * operation code ("barrier request") must be completed prior to the
+ * execution of the barrier request. All writes issued after the barrier
+ * request must not execute until after the completion of the barrier request.
+ *
+ * Optional. See "feature-barrier" XenBus node documentation above.
+ */
+#define BLKIF_OP_WRITE_BARRIER 2
+/*
+ * Commit any uncommitted contents of the backing device's volatile cache
+ * to stable storage.
+ *
+ * Optional. See "feature-flush-cache" XenBus node documentation above.
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE 3
+/*
+ * Used in SLES sources for device specific command packet
+ * contained within the request. Reserved for that purpose.
+ */
+#define BLKIF_OP_RESERVED_1 4
+/*
+ * Indicate to the backend device that a region of storage is no longer in
+ * use, and may be discarded at any time without impact to the client. If
+ * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
+ * discarded region on the device must be rendered unrecoverable before the
+ * command returns.
+ *
+ * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
+ * command on a native device.
+ *
+ * More information about trim/unmap operations can be found at:
+ * http://t13.org/Documents/UploadedDocuments/docs2008/
+ * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
+ * http://www.seagate.com/staticfiles/support/disc/manuals/
+ * Interface%20manuals/100293068c.pdf
+ *
+ * Optional. See "feature-discard", "discard-alignment",
+ * "discard-granularity", and "discard-secure" in the XenBus node
+ * documentation above.
+ */
+#define BLKIF_OP_DISCARD 5
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+/*
+ * NB. first_sect and last_sect in blkif_request_segment, as well as
+ * sector_number in blkif_request, are always expressed in 512-byte units.
+ * However they must be properly aligned to the real sector size of the
+ * physical disk, which is reported in the "physical-sector-size" node in
+ * the backend xenbus info. Also the xenbus "sectors" node is expressed in
+ * 512-byte units.
+ */
+struct blkif_request_segment {
+ grant_ref_t gref; /* reference to I/O buffer frame */
+ /* @first_sect: first sector in frame to transfer (inclusive). */
+ /* @last_sect: last sector in frame to transfer (inclusive). */
+ uint8_t first_sect, last_sect;
+};
+
+/*
+ * Starting ring element for any I/O request.
+ */
+struct blkif_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct blkif_request blkif_request_t;
+
+/*
+ * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
+ * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
+ */
+struct blkif_request_discard {
+ uint8_t operation; /* BLKIF_OP_DISCARD */
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
+#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
+ blkif_vdev_t handle; /* same as for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk */
+ uint64_t nr_sectors; /* number of contiguous sectors to discard*/
+};
+typedef struct blkif_request_discard blkif_request_discard_t;
+
+struct blkif_response {
+ uint64_t id; /* copied from request */
+ uint8_t operation; /* copied from request */
+ int16_t status; /* BLKIF_RSP_??? */
+};
+typedef struct blkif_response blkif_response_t;
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY 0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM 0x1
+#define VDISK_REMOVABLE 0x2
+#define VDISK_READONLY 0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/console.h
@@ -1,0 +1,51 @@
+/******************************************************************************
+ * console.h
+ *
+ * Console I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef uint32_t XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
+
+struct xencons_interface {
+ char in[1024];
+ char out[2048];
+ XENCONS_RING_IDX in_cons, in_prod;
+ XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/fbif.h
@@ -1,0 +1,176 @@
+/*
+ * fbif.h -- Xen virtual frame buffer device
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <[email protected]>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_IO_FBIF_H__
+#define __XEN_PUBLIC_IO_FBIF_H__
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ */
+
+/* Event type 1 currently not used */
+/*
+ * Framebuffer update notification event
+ * Capable frontend sets feature-update in xenstore.
+ * Backend requests it by setting request-update in xenstore.
+ */
+#define XENFB_TYPE_UPDATE 2
+
+struct xenfb_update
+{
+ uint8_t type; /* XENFB_TYPE_UPDATE */
+ int32_t x; /* source x */
+ int32_t y; /* source y */
+ int32_t width; /* rect width */
+ int32_t height; /* rect height */
+};
+
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize
+{
+ uint8_t type; /* XENFB_TYPE_RESIZE */
+ int32_t width; /* width in pixels */
+ int32_t height; /* height in pixels */
+ int32_t stride; /* stride in bytes */
+ int32_t depth; /* depth in bits */
+ int32_t offset; /* offset of the framebuffer in bytes */
+};
+
+#define XENFB_OUT_EVENT_SIZE 40
+
+union xenfb_out_event
+{
+ uint8_t type;
+ struct xenfb_update update;
+ struct xenfb_resize resize;
+ char pad[XENFB_OUT_EVENT_SIZE];
+};
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/*
+ * Framebuffer refresh period advice
+ * Backend sends it to advise the frontend their preferred period of
+ * refresh. Frontends that keep the framebuffer constantly up-to-date
+ * just ignore it. Frontends that use the advice should immediately
+ * refresh the framebuffer (and send an update notification event if
+ * those have been requested), then use the update frequency to guide
+ * their periodical refreshs.
+ */
+#define XENFB_TYPE_REFRESH_PERIOD 1
+#define XENFB_NO_REFRESH 0
+
+struct xenfb_refresh_period
+{
+ uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */
+ uint32_t period; /* period of refresh, in ms,
+ * XENFB_NO_REFRESH if no refresh is needed */
+};
+
+#define XENFB_IN_EVENT_SIZE 40
+
+union xenfb_in_event
+{
+ uint8_t type;
+ struct xenfb_refresh_period refresh_period;
+ char pad[XENFB_IN_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENFB_IN_RING_SIZE 1024
+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
+#define XENFB_IN_RING_OFFS 1024
+#define XENFB_IN_RING(page) \
+ ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+#define XENFB_IN_RING_REF(page, idx) \
+ (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+
+#define XENFB_OUT_RING_SIZE 2048
+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
+#define XENFB_OUT_RING(page) \
+ ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+#define XENFB_OUT_RING_REF(page, idx) \
+ (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+
+struct xenfb_page
+{
+ uint32_t in_cons, in_prod;
+ uint32_t out_cons, out_prod;
+
+ int32_t width; /* the width of the framebuffer (in pixels) */
+ int32_t height; /* the height of the framebuffer (in pixels) */
+ uint32_t line_length; /* the length of a row of pixels (in bytes) */
+ uint32_t mem_length; /* the length of the framebuffer (in bytes) */
+ uint8_t depth; /* the depth of a pixel (in bits) */
+
+ /*
+ * Framebuffer page directory
+ *
+ * Each directory page holds PAGE_SIZE / sizeof(*pd)
+ * framebuffer pages, and can thus map up to PAGE_SIZE *
+ * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and
+ * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+ * 64 bit. 256 directories give enough room for a 512 Meg
+ * framebuffer with a max resolution of 12,800x10,240. Should
+ * be enough for a while with room leftover for expansion.
+ */
+ unsigned long pd[256];
+};
+
+/*
+ * Wart: xenkbd needs to know default resolution. Put it here until a
+ * better solution is found, but don't leak it to the backend.
+ */
+#ifdef __KERNEL__
+#define XENFB_WIDTH 800
+#define XENFB_HEIGHT 600
+#define XENFB_DEPTH 32
+#endif
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/fsif.h
@@ -1,0 +1,192 @@
+/******************************************************************************
+ * fsif.h
+ *
+ * Interface to FS level split device drivers.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007, Grzegorz Milos, <[email protected]>.
+ */
+
+#ifndef __XEN_PUBLIC_IO_FSIF_H__
+#define __XEN_PUBLIC_IO_FSIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+#define REQ_FILE_OPEN 1
+#define REQ_FILE_CLOSE 2
+#define REQ_FILE_READ 3
+#define REQ_FILE_WRITE 4
+#define REQ_STAT 5
+#define REQ_FILE_TRUNCATE 6
+#define REQ_REMOVE 7
+#define REQ_RENAME 8
+#define REQ_CREATE 9
+#define REQ_DIR_LIST 10
+#define REQ_CHMOD 11
+#define REQ_FS_SPACE 12
+#define REQ_FILE_SYNC 13
+
+struct fsif_open_request {
+ grant_ref_t gref;
+};
+
+struct fsif_close_request {
+ uint32_t fd;
+};
+
+struct fsif_read_request {
+ uint32_t fd;
+ int32_t pad;
+ uint64_t len;
+ uint64_t offset;
+ grant_ref_t grefs[1]; /* Variable length */
+};
+
+struct fsif_write_request {
+ uint32_t fd;
+ int32_t pad;
+ uint64_t len;
+ uint64_t offset;
+ grant_ref_t grefs[1]; /* Variable length */
+};
+
+struct fsif_stat_request {
+ uint32_t fd;
+};
+
+/* This structure is a copy of some fields from stat structure, returned
+ * via the ring. */
+struct fsif_stat_response {
+ int32_t stat_mode;
+ uint32_t stat_uid;
+ uint32_t stat_gid;
+ int32_t stat_ret;
+ int64_t stat_size;
+ int64_t stat_atime;
+ int64_t stat_mtime;
+ int64_t stat_ctime;
+};
+
+struct fsif_truncate_request {
+ uint32_t fd;
+ int32_t pad;
+ int64_t length;
+};
+
+struct fsif_remove_request {
+ grant_ref_t gref;
+};
+
+struct fsif_rename_request {
+ uint16_t old_name_offset;
+ uint16_t new_name_offset;
+ grant_ref_t gref;
+};
+
+struct fsif_create_request {
+ int8_t directory;
+ int8_t pad;
+ int16_t pad2;
+ int32_t mode;
+ grant_ref_t gref;
+};
+
+struct fsif_list_request {
+ uint32_t offset;
+ grant_ref_t gref;
+};
+
+#define NR_FILES_SHIFT 0
+#define NR_FILES_SIZE 16 /* 16 bits for the number of files mask */
+#define NR_FILES_MASK (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT)
+#define ERROR_SIZE 32 /* 32 bits for the error mask */
+#define ERROR_SHIFT (NR_FILES_SIZE + NR_FILES_SHIFT)
+#define ERROR_MASK (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT)
+#define HAS_MORE_SHIFT (ERROR_SHIFT + ERROR_SIZE)
+#define HAS_MORE_FLAG (1ULL << HAS_MORE_SHIFT)
+
+struct fsif_chmod_request {
+ uint32_t fd;
+ int32_t mode;
+};
+
+struct fsif_space_request {
+ grant_ref_t gref;
+};
+
+struct fsif_sync_request {
+ uint32_t fd;
+};
+
+
+/* FS operation request */
+struct fsif_request {
+ uint8_t type; /* Type of the request */
+ uint8_t pad;
+ uint16_t id; /* Request ID, copied to the response */
+ uint32_t pad2;
+ union {
+ struct fsif_open_request fopen;
+ struct fsif_close_request fclose;
+ struct fsif_read_request fread;
+ struct fsif_write_request fwrite;
+ struct fsif_stat_request fstat;
+ struct fsif_truncate_request ftruncate;
+ struct fsif_remove_request fremove;
+ struct fsif_rename_request frename;
+ struct fsif_create_request fcreate;
+ struct fsif_list_request flist;
+ struct fsif_chmod_request fchmod;
+ struct fsif_space_request fspace;
+ struct fsif_sync_request fsync;
+ } u;
+};
+typedef struct fsif_request fsif_request_t;
+
+/* FS operation response */
+struct fsif_response {
+ uint16_t id;
+ uint16_t pad1;
+ uint32_t pad2;
+ union {
+ uint64_t ret_val;
+ struct fsif_stat_response fstat;
+ } u;
+};
+
+typedef struct fsif_response fsif_response_t;
+
+#define FSIF_RING_ENTRY_SIZE 64
+
+#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \
+ sizeof(grant_ref_t) + 1)
+#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \
+ sizeof(grant_ref_t) + 1)
+
+DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response);
+
+#define STATE_INITIALISED "init"
+#define STATE_READY "ready"
+#define STATE_CLOSING "closing"
+#define STATE_CLOSED "closed"
+
+
+#endif
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/kbdif.h
@@ -1,0 +1,132 @@
+/*
+ * kbdif.h -- Xen virtual keyboard/mouse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <[email protected]>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_IO_KBDIF_H__
+#define __XEN_PUBLIC_IO_KBDIF_H__
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/* Pointer movement event */
+#define XENKBD_TYPE_MOTION 1
+/* Event type 2 currently not used */
+/* Key event (includes pointer buttons) */
+#define XENKBD_TYPE_KEY 3
+/*
+ * Pointer position event
+ * Capable backend sets feature-abs-pointer in xenstore.
+ * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
+ * request-abs-update in xenstore.
+ */
+#define XENKBD_TYPE_POS 4
+
+struct xenkbd_motion
+{
+ uint8_t type; /* XENKBD_TYPE_MOTION */
+ int32_t rel_x; /* relative X motion */
+ int32_t rel_y; /* relative Y motion */
+ int32_t rel_z; /* relative Z motion (wheel) */
+};
+
+struct xenkbd_key
+{
+ uint8_t type; /* XENKBD_TYPE_KEY */
+ uint8_t pressed; /* 1 if pressed; 0 otherwise */
+ uint32_t keycode; /* KEY_* from linux/input.h */
+};
+
+struct xenkbd_position
+{
+ uint8_t type; /* XENKBD_TYPE_POS */
+ int32_t abs_x; /* absolute X position (in FB pixels) */
+ int32_t abs_y; /* absolute Y position (in FB pixels) */
+ int32_t rel_z; /* relative Z motion (wheel) */
+};
+
+#define XENKBD_IN_EVENT_SIZE 40
+
+union xenkbd_in_event
+{
+ uint8_t type;
+ struct xenkbd_motion motion;
+ struct xenkbd_key key;
+ struct xenkbd_position pos;
+ char pad[XENKBD_IN_EVENT_SIZE];
+};
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ * No out events currently defined.
+ */
+
+#define XENKBD_OUT_EVENT_SIZE 40
+
+union xenkbd_out_event
+{
+ uint8_t type;
+ char pad[XENKBD_OUT_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENKBD_IN_RING_SIZE 2048
+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
+#define XENKBD_IN_RING_OFFS 1024
+#define XENKBD_IN_RING(page) \
+ ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+#define XENKBD_IN_RING_REF(page, idx) \
+ (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+
+#define XENKBD_OUT_RING_SIZE 1024
+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
+#define XENKBD_OUT_RING(page) \
+ ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+#define XENKBD_OUT_RING_REF(page, idx) \
+ (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+
+struct xenkbd_page
+{
+ uint32_t in_cons, in_prod;
+ uint32_t out_cons, out_prod;
+};
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/libxenvchan.h
@@ -1,0 +1,97 @@
+/**
+ * @file
+ * @section AUTHORS
+ *
+ * Copyright (C) 2010 Rafal Wojtczuk <[email protected]>
+ *
+ * Authors:
+ * Rafal Wojtczuk <[email protected]>
+ * Daniel De Graaf <[email protected]>
+ *
+ * @section LICENSE
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * @section DESCRIPTION
+ *
+ * Originally borrowed from the Qubes OS Project, http://www.qubes-os.org,
+ * this code has been substantially rewritten to use the gntdev and gntalloc
+ * devices instead of raw MFNs and map_foreign_range.
+ *
+ * This is a library for inter-domain communication. A standard Xen ring
+ * buffer is used, with a datagram-based interface built on top. The grant
+ * reference and event channels are shared in XenStore under a user-specified
+ * path.
+ *
+ * The ring.h macros define an asymmetric interface to a shared data structure
+ * that assumes all rings reside in a single contiguous memory space. This is
+ * not suitable for vchan because the interface to the ring is symmetric except
+ * for the setup. Unlike the producer-consumer rings defined in ring.h, the
+ * size of the rings used in vchan are determined at execution time instead of
+ * compile time, so the macros in ring.h cannot be used to access the rings.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+
+struct ring_shared {
+ uint32_t cons, prod;
+};
+
+#define VCHAN_NOTIFY_WRITE 0x1
+#define VCHAN_NOTIFY_READ 0x2
+
+/**
+ * vchan_interface: primary shared data structure
+ */
+struct vchan_interface {
+ /**
+ * Standard consumer/producer interface, one pair per buffer
+ * left is client write, server read
+ * right is client read, server write
+ */
+ struct ring_shared left, right;
+ /**
+ * size of the rings, which determines their location
+ * 10 - at offset 1024 in ring's page
+ * 11 - at offset 2048 in ring's page
+ * 12+ - uses 2^(N-12) grants to describe the multi-page ring
+ * These should remain constant once the page is shared.
+ * Only one of the two orders can be 10 (or 11).
+ */
+ uint16_t left_order, right_order;
+ /**
+ * Shutdown detection:
+ * 0: client (or server) has exited
+ * 1: client (or server) is connected
+ * 2: client has not yet connected
+ */
+ uint8_t cli_live, srv_live;
+ /**
+ * Notification bits:
+ * VCHAN_NOTIFY_WRITE: send notify when data is written
+ * VCHAN_NOTIFY_READ: send notify when data is read (consumed)
+ * cli_notify is used for the client to inform the server of its action
+ */
+ uint8_t cli_notify, srv_notify;
+ /**
+ * Grant list: ordering is left, right. Must not extend into actual ring
+ * or grow beyond the end of the initial shared page.
+ * These should remain constant once the page is shared, to allow
+ * for possible remapping by a client that restarts.
+ */
+ uint32_t grants[0];
+};
+
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/netif.h
@@ -1,0 +1,236 @@
+/******************************************************************************
+ * netif.h
+ *
+ * Unified network-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_NETIF_H__
+#define __XEN_PUBLIC_IO_NETIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Older implementation of Xen network frontend / backend has an
+ * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
+ * ring slots a skb can use. Netfront / netback may not work as
+ * expected when frontend and backend have different MAX_SKB_FRAGS.
+ *
+ * A better approach is to add mechanism for netfront / netback to
+ * negotiate this value. However we cannot fix all possible
+ * frontends, so we need to define a value which states the minimum
+ * slots backend must support.
+ *
+ * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
+ * (18), which is proved to work with most frontends. Any new backend
+ * which doesn't negotiate with frontend should expect frontend to
+ * send a valid packet using slots up to this value.
+ */
+#define XEN_NETIF_NR_SLOTS_MIN 18
+
+/*
+ * Notifications after enqueuing any type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
+ * If the client sends notification for rx requests then it should specify
+ * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
+ * that it cannot safely queue packets (as it may not be kicked to send them).
+ */
+
+/*
+ * "feature-split-event-channels" is introduced to separate guest TX
+ * and RX notification. Backend either doesn't support this feature or
+ * advertises it via xenstore as 0 (disabled) or 1 (enabled).
+ *
+ * To make use of this feature, frontend should allocate two event
+ * channels for TX and RX, advertise them to backend as
+ * "event-channel-tx" and "event-channel-rx" respectively. If frontend
+ * doesn't want to use this feature, it just writes "event-channel"
+ * node as before.
+ */
+
+/*
+ * This is the 'wire' format for packets:
+ * Request 1: netif_tx_request -- NETTXF_* (any flags)
+ * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info)
+ * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE)
+ * Request 4: netif_tx_request -- NETTXF_more_data
+ * Request 5: netif_tx_request -- NETTXF_more_data
+ * ...
+ * Request N: netif_tx_request -- 0
+ */
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETTXF_csum_blank (0)
+#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETTXF_data_validated (1)
+#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the next request descriptor. */
+#define _NETTXF_more_data (2)
+#define NETTXF_more_data (1U<<_NETTXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETTXF_extra_info (3)
+#define NETTXF_extra_info (1U<<_NETTXF_extra_info)
+
+#define XEN_NETIF_MAX_TX_SIZE 0xFFFF
+struct netif_tx_request {
+ grant_ref_t gref; /* Reference to buffer page */
+ uint16_t offset; /* Offset within buffer page */
+ uint16_t flags; /* NETTXF_* */
+ uint16_t id; /* Echoed in response message. */
+ uint16_t size; /* Packet size in bytes. */
+};
+typedef struct netif_tx_request netif_tx_request_t;
+
+/* Types of netif_extra_info descriptors. */
+#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MAX (4)
+
+/* netif_extra_info flags. */
+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
+#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
+
+/* GSO types - only TCPv4 currently supported. */
+#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
+
+/*
+ * This structure needs to fit within both netif_tx_request and
+ * netif_rx_response for compatibility.
+ */
+struct netif_extra_info {
+ uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */
+ uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
+
+ union {
+ /*
+ * XEN_NETIF_EXTRA_TYPE_GSO:
+ */
+ struct {
+ /*
+ * Maximum payload size of each segment. For example, for TCP this
+ * is just the path MSS.
+ */
+ uint16_t size;
+
+ /*
+ * GSO type. This determines the protocol of the packet and any
+ * extra features required to segment the packet properly.
+ */
+ uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
+
+ /* Future expansion. */
+ uint8_t pad;
+
+ /*
+ * GSO features. This specifies any extra GSO features required
+ * to process this packet, such as ECN support for TCPv4.
+ */
+ uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
+ } gso;
+
+ /*
+ * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
+ * Backend advertises availability via 'feature-multicast-control'
+ * xenbus node containing value '1'.
+ * Frontend requests this feature by advertising
+ * 'request-multicast-control' xenbus node containing value '1'.
+ * If multicast control is requested then multicast flooding is
+ * disabled and the frontend must explicitly register its interest
+ * in multicast groups using dummy transmit requests containing
+ * MCAST_{ADD,DEL} extra-info fragments.
+ */
+ struct {
+ uint8_t addr[6]; /* Address to add/remove. */
+ } mcast;
+
+ uint16_t pad[3];
+ } u;
+};
+typedef struct netif_extra_info netif_extra_info_t;
+
+struct netif_tx_response {
+ uint16_t id;
+ int16_t status; /* NETIF_RSP_* */
+};
+typedef struct netif_tx_response netif_tx_response_t;
+
+struct netif_rx_request {
+ uint16_t id; /* Echoed in response message. */
+ grant_ref_t gref; /* Reference to incoming granted frame */
+};
+typedef struct netif_rx_request netif_rx_request_t;
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETRXF_data_validated (0)
+#define NETRXF_data_validated (1U<<_NETRXF_data_validated)
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETRXF_csum_blank (1)
+#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
+
+/* Packet continues in the next request descriptor. */
+#define _NETRXF_more_data (2)
+#define NETRXF_more_data (1U<<_NETRXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETRXF_extra_info (3)
+#define NETRXF_extra_info (1U<<_NETRXF_extra_info)
+
+struct netif_rx_response {
+ uint16_t id;
+ uint16_t offset; /* Offset in page of start of received packet */
+ uint16_t flags; /* NETRXF_* */
+ int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
+};
+typedef struct netif_rx_response netif_rx_response_t;
+
+/*
+ * Generate netif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
+
+#define NETIF_RSP_DROPPED -2
+#define NETIF_RSP_ERROR -1
+#define NETIF_RSP_OKAY 0
+/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
+#define NETIF_RSP_NULL 1
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/pciif.h
@@ -1,0 +1,124 @@
+/*
+ * PCI Backend/Frontend Common Data Structures & Macros
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ryan Wilson <[email protected]>
+ */
+#ifndef __XEN_PCI_COMMON_H__
+#define __XEN_PCI_COMMON_H__
+
+/* Be sure to bump this number if you change this file */
+#define XEN_PCI_MAGIC "7"
+
+/* xen_pci_sharedinfo flags */
+#define _XEN_PCIF_active (0)
+#define XEN_PCIF_active (1<<_XEN_PCIF_active)
+#define _XEN_PCIB_AERHANDLER (1)
+#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER)
+#define _XEN_PCIB_active (2)
+#define XEN_PCIB_active (1<<_XEN_PCIB_active)
+
+/* xen_pci_op commands */
+#define XEN_PCI_OP_conf_read (0)
+#define XEN_PCI_OP_conf_write (1)
+#define XEN_PCI_OP_enable_msi (2)
+#define XEN_PCI_OP_disable_msi (3)
+#define XEN_PCI_OP_enable_msix (4)
+#define XEN_PCI_OP_disable_msix (5)
+#define XEN_PCI_OP_aer_detected (6)
+#define XEN_PCI_OP_aer_resume (7)
+#define XEN_PCI_OP_aer_mmio (8)
+#define XEN_PCI_OP_aer_slotreset (9)
+
+/* xen_pci_op error numbers */
+#define XEN_PCI_ERR_success (0)
+#define XEN_PCI_ERR_dev_not_found (-1)
+#define XEN_PCI_ERR_invalid_offset (-2)
+#define XEN_PCI_ERR_access_denied (-3)
+#define XEN_PCI_ERR_not_implemented (-4)
+/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */
+#define XEN_PCI_ERR_op_failed (-5)
+
+/*
+ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry))
+ * Should not exceed 128
+ */
+#define SH_INFO_MAX_VEC 128
+
+struct xen_msix_entry {
+ uint16_t vector;
+ uint16_t entry;
+};
+struct xen_pci_op {
+ /* IN: what action to perform: XEN_PCI_OP_* */
+ uint32_t cmd;
+
+ /* OUT: will contain an error number (if any) from errno.h */
+ int32_t err;
+
+ /* IN: which device to touch */
+ uint32_t domain; /* PCI Domain/Segment */
+ uint32_t bus;
+ uint32_t devfn;
+
+ /* IN: which configuration registers to touch */
+ int32_t offset;
+ int32_t size;
+
+ /* IN/OUT: Contains the result after a READ or the value to WRITE */
+ uint32_t value;
+ /* IN: Contains extra infor for this operation */
+ uint32_t info;
+ /*IN: param for msi-x */
+ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];
+};
+
+/*used for pcie aer handling*/
+struct xen_pcie_aer_op
+{
+
+ /* IN: what action to perform: XEN_PCI_OP_* */
+ uint32_t cmd;
+ /*IN/OUT: return aer_op result or carry error_detected state as input*/
+ int32_t err;
+
+ /* IN: which device to touch */
+ uint32_t domain; /* PCI Domain/Segment*/
+ uint32_t bus;
+ uint32_t devfn;
+};
+struct xen_pci_sharedinfo {
+ /* flags - XEN_PCIF_* */
+ uint32_t flags;
+ struct xen_pci_op op;
+ struct xen_pcie_aer_op aer_op;
+};
+
+#endif /* __XEN_PCI_COMMON_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/protocols.h
@@ -1,0 +1,40 @@
+/******************************************************************************
+ * protocols.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
+#define XEN_IO_PROTO_ABI_ARM "arm-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__arm__) || defined(__aarch64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
+#else
+# error arch fixup needed here
+#endif
+
+#endif
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/ring.h
@@ -1,0 +1,312 @@
+/******************************************************************************
+ * ring.h
+ *
+ * Shared producer-consumer ring macros.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+#include "../xen-compat.h"
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030208
+#define xen_mb() mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+#endif
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1))
+#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x))
+#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __CONST_RING_SIZE(_s, _sz) \
+ (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
+ sizeof(((struct _s##_sring *)0)->ring[0])))
+/*
+ * The same for passing in an actual pointer instead of a name tag.
+ */
+#define __RING_SIZE(_s, _sz) \
+ (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ * DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ *
+ * mytag_sring_t - The shared ring.
+ * mytag_front_ring_t - The 'front' half of the ring.
+ * mytag_back_ring_t - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ * mytag_front_ring_t front_ring;
+ * SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ * mytag_back_ring_t back_ring;
+ * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \
+ \
+/* Shared ring entry */ \
+union __name##_sring_entry { \
+ __req_t req; \
+ __rsp_t rsp; \
+}; \
+ \
+/* Shared ring page */ \
+struct __name##_sring { \
+ RING_IDX req_prod, req_event; \
+ RING_IDX rsp_prod, rsp_event; \
+ union { \
+ struct { \
+ uint8_t smartpoll_active; \
+ } netif; \
+ struct { \
+ uint8_t msg; \
+ } tapif_user; \
+ uint8_t pvt_pad[4]; \
+ } private; \
+ uint8_t __pad[44]; \
+ union __name##_sring_entry ring[1]; /* variable-length */ \
+}; \
+ \
+/* "Front" end's private variables */ \
+struct __name##_front_ring { \
+ RING_IDX req_prod_pvt; \
+ RING_IDX rsp_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* "Back" end's private variables */ \
+struct __name##_back_ring { \
+ RING_IDX rsp_prod_pvt; \
+ RING_IDX req_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* Syntactic sugar */ \
+typedef struct __name##_sring __name##_sring_t; \
+typedef struct __name##_front_ring __name##_front_ring_t; \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do { \
+ (_s)->req_prod = (_s)->rsp_prod = 0; \
+ (_s)->req_event = (_s)->rsp_event = 1; \
+ (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \
+ (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \
+} while(0)
+
+#define FRONT_RING_INIT(_r, _s, __size) do { \
+ (_r)->req_prod_pvt = 0; \
+ (_r)->rsp_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do { \
+ (_r)->rsp_prod_pvt = 0; \
+ (_r)->req_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r) \
+ ((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r) \
+ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r) \
+ (RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
+ ((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \
+ unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \
+ unsigned int rsp = RING_SIZE(_r) - \
+ ((_r)->req_cons - (_r)->rsp_prod_pvt); \
+ req < rsp ? req : rsp; \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) \
+ ((((_r)->sring->req_prod - (_r)->req_cons) < \
+ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \
+ ((_r)->sring->req_prod - (_r)->req_cons) : \
+ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+#define RING_GET_RESPONSE(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
+ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \
+ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do { \
+ xen_wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = (_r)->req_prod_pvt; \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do { \
+ xen_wmb(); /* front sees resps /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ *
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ *
+ * When enqueuing requests or responses:
+ *
+ * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ * is a boolean return value. True indicates that the receiver requires an
+ * asynchronous notification.
+ *
+ * After dequeuing requests or responses (before sleeping the connection):
+ *
+ * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ * The second argument is a boolean return value. True indicates that there
+ * are pending messages on the ring (i.e., the connection should not be put
+ * to sleep).
+ *
+ * These macros will set the req_event/rsp_event field to trigger a
+ * notification on the very next message that is enqueued. If you want to
+ * create batches of work (i.e., only receive a notification after several
+ * messages have been enqueued) then you will need to create a customised
+ * version of the FINAL_CHECK macro in your own code, which sets the event
+ * field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->req_prod; \
+ RING_IDX __new = (_r)->req_prod_pvt; \
+ xen_wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = __new; \
+ xen_mb(); /* back sees new requests /before/ we check req_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->rsp_prod; \
+ RING_IDX __new = (_r)->rsp_prod_pvt; \
+ xen_wmb(); /* front sees resps /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = __new; \
+ xen_mb(); /* front sees new resps /before/ we check rsp_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+ if (_work_to_do) break; \
+ (_r)->sring->req_event = (_r)->req_cons + 1; \
+ xen_mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+ if (_work_to_do) break; \
+ (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \
+ xen_mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+} while (0)
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/tpmif.h
@@ -1,0 +1,143 @@
+/******************************************************************************
+ * tpmif.h
+ *
+ * TPM I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, IBM Corporation
+ *
+ * Author: Stefan Berger, [email protected]
+ * Grant table support: Mahadevan Gomathisankaran
+ *
+ * This code has been derived from tools/libxc/xen/io/netif.h
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_TPMIF_H__
+#define __XEN_PUBLIC_IO_TPMIF_H__
+
+#include "../grant_table.h"
+
+struct tpmif_tx_request {
+ unsigned long addr; /* Machine address of packet. */
+ grant_ref_t ref; /* grant table access reference */
+ uint16_t unused;
+ uint16_t size; /* Packet size in bytes. */
+};
+typedef struct tpmif_tx_request tpmif_tx_request_t;
+
+/*
+ * The TPMIF_TX_RING_SIZE defines the number of pages the
+ * front-end and backend can exchange (= size of array).
+ */
+typedef uint32_t TPMIF_RING_IDX;
+
+#define TPMIF_TX_RING_SIZE 1
+
+/* This structure must fit in a memory page. */
+
+struct tpmif_ring {
+ struct tpmif_tx_request req;
+};
+typedef struct tpmif_ring tpmif_ring_t;
+
+struct tpmif_tx_interface {
+ struct tpmif_ring ring[TPMIF_TX_RING_SIZE];
+};
+typedef struct tpmif_tx_interface tpmif_tx_interface_t;
+
+/******************************************************************************
+ * TPM I/O interface for Xen guest OSes, v2
+ *
+ * Author: Daniel De Graaf <[email protected]>
+ *
+ * This protocol emulates the request/response behavior of a TPM using a Xen
+ * shared memory interface. All interaction with the TPM is at the direction
+ * of the frontend, since a TPM (hardware or virtual) is a passive device -
+ * the backend only processes commands as requested by the frontend.
+ *
+ * The frontend sends a request to the TPM by populating the shared page with
+ * the request packet, changing the state to TPMIF_STATE_SUBMIT, and sending
+ * and event channel notification. When the backend is finished, it will set
+ * the state to TPMIF_STATE_FINISH and send an event channel notification.
+ *
+ * In order to allow long-running commands to be canceled, the frontend can
+ * at any time change the state to TPMIF_STATE_CANCEL and send a notification.
+ * The TPM can either finish the command (changing state to TPMIF_STATE_FINISH)
+ * or can cancel the command and change the state to TPMIF_STATE_IDLE. The TPM
+ * can also change the state to TPMIF_STATE_IDLE instead of TPMIF_STATE_FINISH
+ * if another reason for cancellation is required - for example, a physical
+ * TPM may cancel a command if the interface is seized by another locality.
+ *
+ * The TPM command format is defined by the TCG, and is available at
+ * http://www.trustedcomputinggroup.org/resources/tpm_main_specification
+ */
+
+enum tpmif_state {
+ TPMIF_STATE_IDLE, /* no contents / vTPM idle / cancel complete */
+ TPMIF_STATE_SUBMIT, /* request ready / vTPM working */
+ TPMIF_STATE_FINISH, /* response ready / vTPM idle */
+ TPMIF_STATE_CANCEL, /* cancel requested / vTPM working */
+};
+/* Note: The backend should only change state to IDLE or FINISH, while the
+ * frontend should only change to SUBMIT or CANCEL. Status changes do not need
+ * to use atomic operations.
+ */
+
+
+/* The shared page for vTPM request/response packets looks like:
+ *
+ * Offset Contents
+ * =================================================
+ * 0 struct tpmif_shared_page
+ * 16 [optional] List of grant IDs
+ * 16+4*nr_extra_pages TPM packet data
+ *
+ * If the TPM packet data extends beyond the end of a single page, the grant IDs
+ * defined in extra_pages are used as if they were mapped immediately following
+ * the primary shared page. The grants are allocated by the frontend and mapped
+ * by the backend. Before sending a request spanning multiple pages, the
+ * frontend should verify that the TPM supports such large requests by querying
+ * the TPM_CAP_PROP_INPUT_BUFFER property from the TPM.
+ */
+struct tpmif_shared_page {
+ uint32_t length; /* request/response length in bytes */
+
+ uint8_t state; /* enum tpmif_state */
+ uint8_t locality; /* for the current request */
+ uint8_t pad; /* should be zero */
+
+ uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */
+ uint32_t extra_pages[0]; /* grant IDs; length is actually nr_extra_pages */
+};
+typedef struct tpmif_shared_page tpmif_shared_page_t;
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/usbif.h
@@ -1,0 +1,150 @@
+/*
+ * usbif.h
+ *
+ * USB I/O interface for Xen guest OSes.
+ *
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
+ * Author: Noboru Iwamatsu <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_IO_USBIF_H__
+#define __XEN_PUBLIC_IO_USBIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+enum usb_spec_version {
+ USB_VER_UNKNOWN = 0,
+ USB_VER_USB11,
+ USB_VER_USB20,
+ USB_VER_USB30, /* not supported yet */
+};
+
+/*
+ * USB pipe in usbif_request
+ *
+ * bits 0-5 are specific bits for virtual USB driver.
+ * bits 7-31 are standard urb pipe.
+ *
+ * - port number(NEW): bits 0-4
+ * (USB_MAXCHILDREN is 31)
+ *
+ * - operation flag(NEW): bit 5
+ * (0 = submit urb,
+ * 1 = unlink urb)
+ *
+ * - direction: bit 7
+ * (0 = Host-to-Device [Out]
+ * 1 = Device-to-Host [In])
+ *
+ * - device address: bits 8-14
+ *
+ * - endpoint: bits 15-18
+ *
+ * - pipe type: bits 30-31
+ * (00 = isochronous, 01 = interrupt,
+ * 10 = control, 11 = bulk)
+ */
+#define usbif_pipeportnum(pipe) ((pipe) & 0x1f)
+#define usbif_setportnum_pipe(pipe, portnum) \
+ ((pipe)|(portnum))
+
+#define usbif_pipeunlink(pipe) ((pipe) & 0x20)
+#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe))
+#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20))
+
+#define USBIF_MAX_SEGMENTS_PER_REQUEST (16)
+
+/*
+ * RING for transferring urbs.
+ */
+struct usbif_request_segment {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t length;
+};
+
+struct usbif_urb_request {
+ uint16_t id; /* request id */
+ uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */
+
+ /* basic urb parameter */
+ uint32_t pipe;
+ uint16_t transfer_flags;
+ uint16_t buffer_length;
+ union {
+ uint8_t ctrl[8]; /* setup_packet (Ctrl) */
+
+ struct {
+ uint16_t interval; /* maximum (1024*8) in usb core */
+ uint16_t start_frame; /* start frame */
+ uint16_t number_of_packets; /* number of ISO packet */
+ uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
+ } isoc;
+
+ struct {
+ uint16_t interval; /* maximum (1024*8) in usb core */
+ uint16_t pad[3];
+ } intr;
+
+ struct {
+ uint16_t unlink_id; /* unlink request id */
+ uint16_t pad[3];
+ } unlink;
+
+ } u;
+
+ /* urb data segments */
+ struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct usbif_urb_request usbif_urb_request_t;
+
+struct usbif_urb_response {
+ uint16_t id; /* request id */
+ uint16_t start_frame; /* start frame (ISO) */
+ int32_t status; /* status (non-ISO) */
+ int32_t actual_length; /* actual transfer length */
+ int32_t error_count; /* number of ISO errors */
+};
+typedef struct usbif_urb_response usbif_urb_response_t;
+
+DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response);
+#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE)
+
+/*
+ * RING for notifying connect/disconnect events to frontend
+ */
+struct usbif_conn_request {
+ uint16_t id;
+};
+typedef struct usbif_conn_request usbif_conn_request_t;
+
+struct usbif_conn_response {
+ uint16_t id; /* request id */
+ uint8_t portnum; /* port number */
+ uint8_t speed; /* usb_device_speed */
+};
+typedef struct usbif_conn_response usbif_conn_response_t;
+
+DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response);
+#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE)
+
+#endif /* __XEN_PUBLIC_IO_USBIF_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/vscsiif.h
@@ -1,0 +1,117 @@
+/******************************************************************************
+ * vscsiif.h
+ *
+ * Based on the blkif.h code.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright(c) FUJITSU Limited 2008.
+ */
+
+#ifndef __XEN__PUBLIC_IO_SCSI_H__
+#define __XEN__PUBLIC_IO_SCSI_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/* commands between backend and frontend */
+#define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */
+#define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/
+#define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/
+#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */
+
+/*
+ * Maximum scatter/gather segments per request.
+ *
+ * Considering balance between allocating at least 16 "vscsiif_request"
+ * structures on one page (4096 bytes) and the number of scatter/gather
+ * elements needed, we decided to use 26 as a magic number.
+ */
+#define VSCSIIF_SG_TABLESIZE 26
+
+/*
+ * based on Linux kernel 2.6.18
+ */
+#define VSCSIIF_MAX_COMMAND_SIZE 16
+#define VSCSIIF_SENSE_BUFFERSIZE 96
+
+struct scsiif_request_segment {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t length;
+};
+typedef struct scsiif_request_segment vscsiif_segment_t;
+
+struct vscsiif_request {
+ uint16_t rqid; /* private guest value, echoed in resp */
+ uint8_t act; /* command between backend and frontend */
+ uint8_t cmd_len;
+
+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
+ uint16_t timeout_per_command; /* The command is issued by twice
+ the value in Backend. */
+ uint16_t channel, id, lun;
+ uint16_t padding;
+ uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1)
+ DMA_FROM_DEVICE(2)
+ DMA_NONE(3) requests */
+ uint8_t nr_segments; /* Number of pieces of scatter-gather */
+
+ vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];
+ uint32_t reserved[3];
+};
+typedef struct vscsiif_request vscsiif_request_t;
+
+#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \
+ / sizeof(vscsiif_segment_t))
+
+struct vscsiif_sg_list {
+ /* First two fields must match struct vscsiif_request! */
+ uint16_t rqid; /* private guest value, must match main req */
+ uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */
+ uint8_t nr_segments; /* Number of pieces of scatter-gather */
+ vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];
+};
+typedef struct vscsiif_sg_list vscsiif_sg_list_t;
+
+struct vscsiif_response {
+ uint16_t rqid;
+ uint8_t act; /* valid only when backend supports SG_PRESET */
+ uint8_t sense_len;
+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
+ int32_t rslt;
+ uint32_t residual_len; /* request bufflen -
+ return the value from physical device */
+ uint32_t reserved[36];
+};
+typedef struct vscsiif_response vscsiif_response_t;
+
+DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
+
+
+#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/xenbus.h
@@ -1,0 +1,80 @@
+/*****************************************************************************
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus. States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+ XenbusStateUnknown = 0,
+
+ XenbusStateInitialising = 1,
+
+ /*
+ * InitWait: Finished early initialisation but waiting for information
+ * from the peer or hotplug scripts.
+ */
+ XenbusStateInitWait = 2,
+
+ /*
+ * Initialised: Waiting for a connection from the peer.
+ */
+ XenbusStateInitialised = 3,
+
+ XenbusStateConnected = 4,
+
+ /*
+ * Closing: The device is being closed due to an error or an unplug event.
+ */
+ XenbusStateClosing = 5,
+
+ XenbusStateClosed = 6,
+
+ /*
+ * Reconfiguring: The device is being reconfigured.
+ */
+ XenbusStateReconfiguring = 7,
+
+ XenbusStateReconfigured = 8
+};
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/io/xs_wire.h
@@ -1,0 +1,138 @@
+/*
+ * Details of the "wire" protocol between Xen Store Daemon and client
+ * library or guest kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ */
+
+#ifndef _XS_WIRE_H
+#define _XS_WIRE_H
+
+enum xsd_sockmsg_type
+{
+ XS_DEBUG,
+ XS_DIRECTORY,
+ XS_READ,
+ XS_GET_PERMS,
+ XS_WATCH,
+ XS_UNWATCH,
+ XS_TRANSACTION_START,
+ XS_TRANSACTION_END,
+ XS_INTRODUCE,
+ XS_RELEASE,
+ XS_GET_DOMAIN_PATH,
+ XS_WRITE,
+ XS_MKDIR,
+ XS_RM,
+ XS_SET_PERMS,
+ XS_WATCH_EVENT,
+ XS_ERROR,
+ XS_IS_DOMAIN_INTRODUCED,
+ XS_RESUME,
+ XS_SET_TARGET,
+ XS_RESTRICT,
+ XS_RESET_WATCHES
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors
+{
+ int errnum;
+ const char *errstring;
+};
+#ifdef EINVAL
+#define XSD_ERROR(x) { x, #x }
+/* LINTED: static unused */
+static struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+ = {
+ XSD_ERROR(EINVAL),
+ XSD_ERROR(EACCES),
+ XSD_ERROR(EEXIST),
+ XSD_ERROR(EISDIR),
+ XSD_ERROR(ENOENT),
+ XSD_ERROR(ENOMEM),
+ XSD_ERROR(ENOSPC),
+ XSD_ERROR(EIO),
+ XSD_ERROR(ENOTEMPTY),
+ XSD_ERROR(ENOSYS),
+ XSD_ERROR(EROFS),
+ XSD_ERROR(EBUSY),
+ XSD_ERROR(EAGAIN),
+ XSD_ERROR(EISCONN),
+ XSD_ERROR(E2BIG)
+};
+#endif
+
+struct xsd_sockmsg
+{
+ uint32_t type; /* XS_??? */
+ uint32_t req_id;/* Request identifier, echoed in daemon's response. */
+ uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
+ uint32_t len; /* Length of data following this. */
+
+ /* Generally followed by nul-terminated string(s). */
+};
+
+enum xs_watch_type
+{
+ XS_WATCH_PATH = 0,
+ XS_WATCH_TOKEN
+};
+
+/*
+ * `incontents 150 xenstore_struct XenStore wire protocol.
+ *
+ * Inter-domain shared memory communications. */
+#define XENSTORE_RING_SIZE 1024
+typedef uint32_t XENSTORE_RING_IDX;
+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
+struct xenstore_domain_interface {
+ char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
+ char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
+ XENSTORE_RING_IDX req_cons, req_prod;
+ XENSTORE_RING_IDX rsp_cons, rsp_prod;
+};
+
+/* Violating this is very bad. See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
+#endif /* _XS_WIRE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/kexec.h
@@ -1,0 +1,165 @@
+/******************************************************************************
+ * kexec.h - Public portion
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <[email protected]>
+ * - Magnus Damm <[email protected]>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+
+/* This file describes the Kexec / Kdump hypercall interface for Xen.
+ *
+ * Kexec under vanilla Linux allows a user to reboot the physical machine
+ * into a new user-specified kernel. The Xen port extends this idea
+ * to allow rebooting of the machine from dom0. When kexec for dom0
+ * is used to reboot, both the hypervisor and the domains get replaced
+ * with some other kernel. It is possible to kexec between vanilla
+ * Linux and Xen and back again. Xen to Xen works well too.
+ *
+ * The hypercall interface for kexec can be divided into three main
+ * types of hypercall operations:
+ *
+ * 1) Range information:
+ * This is used by the dom0 kernel to ask the hypervisor about various
+ * address information. This information is needed to allow kexec-tools
+ * to fill in the ELF headers for /proc/vmcore properly.
+ *
+ * 2) Load and unload of images:
+ * There are no big surprises here, the kexec binary from kexec-tools
+ * runs in userspace in dom0. The tool loads/unloads data into the
+ * dom0 kernel such as new kernel, initramfs and hypervisor. When
+ * loaded the dom0 kernel performs a load hypercall operation, and
+ * before releasing all page references the dom0 kernel calls unload.
+ *
+ * 3) Kexec operation:
+ * This is used to start a previously loaded kernel.
+ */
+
+#include "xen.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#define KEXEC_XEN_NO_PAGES 17
+#endif
+
+/*
+ * Prototype for this hypercall is:
+ * int kexec_op(int cmd, void *args)
+ * @cmd == KEXEC_CMD_...
+ * KEXEC operation to perform
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Kexec supports two types of operation:
+ * - kexec into a regular kernel, very similar to a standard reboot
+ * - KEXEC_TYPE_DEFAULT is used to specify this type
+ * - kexec into a special "crash kernel", aka kexec-on-panic
+ * - KEXEC_TYPE_CRASH is used to specify this type
+ * - parts of our system may be broken at kexec-on-panic time
+ * - the code should be kept as simple and self-contained as possible
+ */
+
+#define KEXEC_TYPE_DEFAULT 0
+#define KEXEC_TYPE_CRASH 1
+
+
+/* The kexec implementation for Xen allows the user to load two
+ * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH.
+ * All data needed for a kexec reboot is kept in one xen_kexec_image_t
+ * per "instance". The data mainly consists of machine address lists to pages
+ * together with destination addresses. The data in xen_kexec_image_t
+ * is passed to the "code page" which is one page of code that performs
+ * the final relocations before jumping to the new kernel.
+ */
+
+typedef struct xen_kexec_image {
+#if defined(__i386__) || defined(__x86_64__)
+ unsigned long page_list[KEXEC_XEN_NO_PAGES];
+#endif
+ unsigned long indirection_page;
+ unsigned long start_address;
+} xen_kexec_image_t;
+
+/*
+ * Perform kexec having previously loaded a kexec or kdump kernel
+ * as appropriate.
+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ */
+#define KEXEC_CMD_kexec 0
+typedef struct xen_kexec_exec {
+ int type;
+} xen_kexec_exec_t;
+
+/*
+ * Load/Unload kernel image for kexec or kdump.
+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ * image == relocation information for kexec (ignored for unload) [in]
+ */
+#define KEXEC_CMD_kexec_load 1
+#define KEXEC_CMD_kexec_unload 2
+typedef struct xen_kexec_load {
+ int type;
+ xen_kexec_image_t image;
+} xen_kexec_load_t;
+
+#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */
+#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */
+#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */
+#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap
+ * Note that although this is adjacent
+ * to Xen it exists in a separate EFI
+ * region on ia64, and thus needs to be
+ * inserted into iomem_machine separately */
+#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* Obsolete: machine address and size of
+ * the ia64_boot_param */
+#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of
+ * of the EFI Memory Map */
+#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */
+
+/*
+ * Find the address and size of certain memory areas
+ * range == KEXEC_RANGE_... [in]
+ * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in]
+ * size == number of bytes reserved in window [out]
+ * start == address of the first byte in the window [out]
+ */
+#define KEXEC_CMD_kexec_get_range 3
+typedef struct xen_kexec_range {
+ int range;
+ int nr;
+ unsigned long size;
+ unsigned long start;
+} xen_kexec_range_t;
+
+#endif /* _XEN_PUBLIC_KEXEC_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/mem_event.h
@@ -1,0 +1,82 @@
+/******************************************************************************
+ * mem_event.h
+ *
+ * Memory event common structures.
+ *
+ * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _XEN_PUBLIC_MEM_EVENT_H
+#define _XEN_PUBLIC_MEM_EVENT_H
+
+#include "xen.h"
+#include "io/ring.h"
+
+/* Memory event flags */
+#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0)
+#define MEM_EVENT_FLAG_DROP_PAGE (1 << 1)
+#define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2)
+#define MEM_EVENT_FLAG_FOREIGN (1 << 3)
+#define MEM_EVENT_FLAG_DUMMY (1 << 4)
+
+/* Reasons for the memory event request */
+#define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */
+#define MEM_EVENT_REASON_VIOLATION 1 /* access violation, GFN is address */
+#define MEM_EVENT_REASON_CR0 2 /* CR0 was hit: gfn is CR0 value */
+#define MEM_EVENT_REASON_CR3 3 /* CR3 was hit: gfn is CR3 value */
+#define MEM_EVENT_REASON_CR4 4 /* CR4 was hit: gfn is CR4 value */
+#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */
+#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */
+#define MEM_EVENT_REASON_MSR 7 /* MSR was hit: gfn is MSR value, gla is MSR address;
+ does NOT honour HVMPME_onchangeonly */
+
+typedef struct mem_event_st {
+ uint32_t flags;
+ uint32_t vcpu_id;
+
+ uint64_t gfn;
+ uint64_t offset;
+ uint64_t gla; /* if gla_valid */
+
+ uint32_t p2mt;
+
+ uint16_t access_r:1;
+ uint16_t access_w:1;
+ uint16_t access_x:1;
+ uint16_t gla_valid:1;
+ uint16_t available:12;
+
+ uint16_t reason;
+} mem_event_request_t, mem_event_response_t;
+
+DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/memory.h
@@ -1,0 +1,472 @@
+/******************************************************************************
+ * memory.h
+ *
+ * Memory reservation and information.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_MEMORY_H__
+#define __XEN_PUBLIC_MEMORY_H__
+
+#include "xen.h"
+
+/*
+ * Increase or decrease the specified domain's memory reservation. Returns the
+ * number of extents successfully allocated or freed.
+ * arg == addr of struct xen_memory_reservation.
+ */
+#define XENMEM_increase_reservation 0
+#define XENMEM_decrease_reservation 1
+#define XENMEM_populate_physmap 6
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+/*
+ * Maximum # bits addressable by the user of the allocated region (e.g., I/O
+ * devices often have a 32-bit limitation even in 64-bit systems). If zero
+ * then the user has no addressing restriction. This field is not used by
+ * XENMEM_decrease_reservation.
+ */
+#define XENMEMF_address_bits(x) (x)
+#define XENMEMF_get_address_bits(x) ((x) & 0xffu)
+/* NUMA node to allocate from. */
+#define XENMEMF_node(x) (((x) + 1) << 8)
+#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+/* Flag to populate physmap with populate-on-demand entries */
+#define XENMEMF_populate_on_demand (1<<16)
+/* Flag to request allocation only from the node specified */
+#define XENMEMF_exact_node_request (1<<17)
+#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
+#endif
+
+struct xen_memory_reservation {
+
+ /*
+ * XENMEM_increase_reservation:
+ * OUT: MFN (*not* GMFN) bases of extents that were allocated
+ * XENMEM_decrease_reservation:
+ * IN: GMFN bases of extents to free
+ * XENMEM_populate_physmap:
+ * IN: GPFN bases of extents to populate with memory
+ * OUT: GMFN bases of extents that were allocated
+ * (NB. This command also updates the mach_to_phys translation table)
+ * XENMEM_claim_pages:
+ * IN: must be zero
+ */
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+ /* Number of extents, and size/alignment of each (2^extent_order pages). */
+ xen_ulong_t nr_extents;
+ unsigned int extent_order;
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+ /* XENMEMF flags. */
+ unsigned int mem_flags;
+#else
+ unsigned int address_bits;
+#endif
+
+ /*
+ * Domain whose reservation is being changed.
+ * Unprivileged domains can specify only DOMID_SELF.
+ */
+ domid_t domid;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
+
+/*
+ * An atomic exchange of memory pages. If return code is zero then
+ * @out.extent_list provides GMFNs of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code.
+ * On complete success then always @nr_exchanged == @in.nr_extents.
+ * On partial success @nr_exchanged indicates how much work was done.
+ */
+#define XENMEM_exchange 11
+struct xen_memory_exchange {
+ /*
+ * [IN] Details of memory extents to be exchanged (GMFN bases).
+ * Note that @in.address_bits is ignored and unused.
+ */
+ struct xen_memory_reservation in;
+
+ /*
+ * [IN/OUT] Details of new memory extents.
+ * We require that:
+ * 1. @in.domid == @out.domid
+ * 2. @in.nr_extents << @in.extent_order ==
+ * @out.nr_extents << @out.extent_order
+ * 3. @in.extent_start and @out.extent_start lists must not overlap
+ * 4. @out.extent_start lists GPFN bases to be populated
+ * 5. @out.extent_start is overwritten with allocated GMFN bases
+ */
+ struct xen_memory_reservation out;
+
+ /*
+ * [OUT] Number of input extents that were successfully exchanged:
+ * 1. The first @nr_exchanged input extents were successfully
+ * deallocated.
+ * 2. The corresponding first entries in the output extent list correctly
+ * indicate the GMFNs that were successfully exchanged.
+ * 3. All other input and output extents are untouched.
+ * 4. If not all input exents are exchanged then the return code of this
+ * command will be non-zero.
+ * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
+ */
+ xen_ulong_t nr_exchanged;
+};
+typedef struct xen_memory_exchange xen_memory_exchange_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
+
+/*
+ * Returns the maximum machine frame number of mapped RAM in this system.
+ * This command always succeeds (it never returns an error code).
+ * arg == NULL.
+ */
+#define XENMEM_maximum_ram_page 2
+
+/*
+ * Returns the current or maximum memory reservation, in pages, of the
+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
+ * arg == addr of domid_t.
+ */
+#define XENMEM_current_reservation 3
+#define XENMEM_maximum_reservation 4
+
+/*
+ * Returns the maximum GPFN in use by the guest, or -ve errcode on failure.
+ */
+#define XENMEM_maximum_gpfn 14
+
+/*
+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table do not implement
+ * this command.
+ * arg == addr of xen_machphys_mfn_list_t.
+ */
+#define XENMEM_machphys_mfn_list 5
+struct xen_machphys_mfn_list {
+ /*
+ * Size of the 'extent_start' array. Fewer entries will be filled if the
+ * machphys table is smaller than max_extents * 2MB.
+ */
+ unsigned int max_extents;
+
+ /*
+ * Pointer to buffer to fill with list of extent starts. If there are
+ * any large discontiguities in the machine address space, 2MB gaps in
+ * the machphys table will be represented by an MFN base of zero.
+ */
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+ /*
+ * Number of extents written to the above array. This will be smaller
+ * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
+ */
+ unsigned int nr_extents;
+};
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
+
+/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping 12
+struct xen_machphys_mapping {
+ xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */
+ xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
+
+/* Source mapping space. */
+/* ` enum phys_map_space { */
+#define XENMAPSPACE_shared_info 0 /* shared info page */
+#define XENMAPSPACE_grant_table 1 /* grant table page */
+#define XENMAPSPACE_gmfn 2 /* GMFN */
+#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, XENMEM_add_to_physmap_range only. */
+/* ` } */
+
+/*
+ * Sets the GPFN at which a particular page appears in the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_add_to_physmap_t.
+ */
+#define XENMEM_add_to_physmap 7
+struct xen_add_to_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* Number of pages to go through for gmfn_range */
+ uint16_t size;
+
+ unsigned int space; /* => enum phys_map_space */
+
+#define XENMAPIDX_grant_table_status 0x80000000
+
+ /* Index into space being mapped. */
+ xen_ulong_t idx;
+
+ /* GPFN in domid where the source mapping page should appear. */
+ xen_pfn_t gpfn;
+};
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
+
+/* A batched version of add_to_physmap. */
+#define XENMEM_add_to_physmap_range 23
+struct xen_add_to_physmap_range {
+ /* IN */
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+ uint16_t space; /* => enum phys_map_space */
+
+ /* Number of pages to go through */
+ uint16_t size;
+ domid_t foreign_domid; /* IFF gmfn_foreign */
+
+ /* Indexes into space being mapped. */
+ XEN_GUEST_HANDLE(xen_ulong_t) idxs;
+
+ /* GPFN in domid where the source mapping page should appear. */
+ XEN_GUEST_HANDLE(xen_pfn_t) gpfns;
+
+ /* OUT */
+
+ /* Per index error code. */
+ XEN_GUEST_HANDLE(int) errs;
+};
+typedef struct xen_add_to_physmap_range xen_add_to_physmap_range_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t);
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap 15
+struct xen_remove_from_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* GPFN of the current mapping of the page. */
+ xen_pfn_t gpfn;
+};
+typedef struct xen_remove_from_physmap xen_remove_from_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t);
+
+/*** REMOVED ***/
+/*#define XENMEM_translate_gpfn_list 8*/
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_memory_map 9
+struct xen_memory_map {
+ /*
+ * On call the number of entries which can be stored in buffer. On
+ * return the number of entries which have been stored in
+ * buffer.
+ */
+ unsigned int nr_entries;
+
+ /*
+ * Entries in the buffer are in the same format as returned by the
+ * BIOS INT 0x15 EAX=0xE820 call.
+ */
+ XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_machine_memory_map 10
+
+/*
+ * Set the pseudo-physical memory map of a domain, as returned by
+ * XENMEM_memory_map.
+ * arg == addr of xen_foreign_memory_map_t.
+ */
+#define XENMEM_set_memory_map 13
+struct xen_foreign_memory_map {
+ domid_t domid;
+ struct xen_memory_map map;
+};
+typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
+
+#define XENMEM_set_pod_target 16
+#define XENMEM_get_pod_target 17
+struct xen_pod_target {
+ /* IN */
+ uint64_t target_pages;
+ /* OUT */
+ uint64_t tot_pages;
+ uint64_t pod_cache_pages;
+ uint64_t pod_entries;
+ /* IN */
+ domid_t domid;
+};
+typedef struct xen_pod_target xen_pod_target_t;
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+
+/*
+ * Get the number of MFNs saved through memory sharing.
+ * The call never fails.
+ */
+#define XENMEM_get_sharing_freed_pages 18
+#define XENMEM_get_sharing_shared_pages 19
+
+#define XENMEM_paging_op 20
+#define XENMEM_paging_op_nominate 0
+#define XENMEM_paging_op_evict 1
+#define XENMEM_paging_op_prep 2
+
+#define XENMEM_access_op 21
+#define XENMEM_access_op_resume 0
+
+struct xen_mem_event_op {
+ uint8_t op; /* XENMEM_*_op_* */
+ domid_t domain;
+
+
+ /* PAGING_PREP IN: buffer to immediately fill page in */
+ uint64_aligned_t buffer;
+ /* Other OPs */
+ uint64_aligned_t gfn; /* IN: gfn of page being operated on */
+};
+typedef struct xen_mem_event_op xen_mem_event_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t);
+
+#define XENMEM_sharing_op 22
+#define XENMEM_sharing_op_nominate_gfn 0
+#define XENMEM_sharing_op_nominate_gref 1
+#define XENMEM_sharing_op_share 2
+#define XENMEM_sharing_op_resume 3
+#define XENMEM_sharing_op_debug_gfn 4
+#define XENMEM_sharing_op_debug_mfn 5
+#define XENMEM_sharing_op_debug_gref 6
+#define XENMEM_sharing_op_add_physmap 7
+#define XENMEM_sharing_op_audit 8
+
+#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10)
+#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9)
+
+/* The following allows sharing of grant refs. This is useful
+ * for sharing utilities sitting as "filters" in IO backends
+ * (e.g. memshr + blktap(2)). The IO backend is only exposed
+ * to grant references, and this allows sharing of the grefs */
+#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62)
+
+#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \
+ (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val)
+#define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \
+ ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)
+#define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \
+ ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG))
+
+struct xen_mem_sharing_op {
+ uint8_t op; /* XENMEM_sharing_op_* */
+ domid_t domain;
+
+ union {
+ struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */
+ union {
+ uint64_aligned_t gfn; /* IN: gfn to nominate */
+ uint32_t grant_ref; /* IN: grant ref to nominate */
+ } u;
+ uint64_aligned_t handle; /* OUT: the handle */
+ } nominate;
+ struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */
+ uint64_aligned_t source_gfn; /* IN: the gfn of the source page */
+ uint64_aligned_t source_handle; /* IN: handle to the source page */
+ uint64_aligned_t client_gfn; /* IN: the client gfn */
+ uint64_aligned_t client_handle; /* IN: handle to the client page */
+ domid_t client_domain; /* IN: the client domain id */
+ } share;
+ struct mem_sharing_op_debug { /* OP_DEBUG_xxx */
+ union {
+ uint64_aligned_t gfn; /* IN: gfn to debug */
+ uint64_aligned_t mfn; /* IN: mfn to debug */
+ uint32_t gref; /* IN: gref to debug */
+ } u;
+ } debug;
+ } u;
+};
+typedef struct xen_mem_sharing_op xen_mem_sharing_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
+
+/*
+ * Attempt to stake a claim for a domain on a quantity of pages
+ * of system RAM, but _not_ assign specific pageframes. Only
+ * arithmetic is performed so the hypercall is very fast and need
+ * not be preemptible, thus sidestepping time-of-check-time-of-use
+ * races for memory allocation. Returns 0 if the hypervisor page
+ * allocator has atomically and successfully claimed the requested
+ * number of pages, else non-zero.
+ *
+ * Any domain may have only one active claim. When sufficient memory
+ * has been allocated to resolve the claim, the claim silently expires.
+ * Claiming zero pages effectively resets any outstanding claim and
+ * is always successful.
+ *
+ * Note that a valid claim may be staked even after memory has been
+ * allocated for a domain. In this case, the claim is not incremental,
+ * i.e. if the domain's tot_pages is 3, and a claim is staked for 10,
+ * only 7 additional pages are claimed.
+ *
+ * Caller must be privileged or the hypercall fails.
+ */
+#define XENMEM_claim_pages 24
+
+/*
+ * XENMEM_claim_pages flags - the are no flags at this time.
+ * The zero value is appropiate.
+ */
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#endif /* __XEN_PUBLIC_MEMORY_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/nmi.h
@@ -1,0 +1,85 @@
+/******************************************************************************
+ * nmi.h
+ *
+ * NMI callback registration and reason codes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_NMI_H__
+#define __XEN_PUBLIC_NMI_H__
+
+#include "xen.h"
+
+/*
+ * NMI reason codes:
+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
+ */
+ /* I/O-check error reported via ISA port 0x61, bit 6. */
+#define _XEN_NMIREASON_io_error 0
+#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error)
+ /* PCI SERR reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_pci_serr 1
+#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr)
+#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */
+ /* Parity error reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_parity_error 1
+#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error)
+#endif
+ /* Unknown hardware-generated NMI. */
+#define _XEN_NMIREASON_unknown 2
+#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown)
+
+/*
+ * long nmi_op(unsigned int cmd, void *arg)
+ * NB. All ops return zero on success, else a negative error code.
+ */
+
+/*
+ * Register NMI callback for this (calling) VCPU. Currently this only makes
+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
+ * arg == pointer to xennmi_callback structure.
+ */
+#define XENNMI_register_callback 0
+struct xennmi_callback {
+ unsigned long handler_address;
+ unsigned long pad;
+};
+typedef struct xennmi_callback xennmi_callback_t;
+DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t);
+
+/*
+ * Deregister NMI callback for this (calling) VCPU.
+ * arg == NULL.
+ */
+#define XENNMI_unregister_callback 1
+
+#endif /* __XEN_PUBLIC_NMI_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/physdev.h
@@ -1,0 +1,376 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_PHYSDEV_H__
+#define __XEN_PUBLIC_PHYSDEV_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ * int physdev_op(int cmd, void *args)
+ * @cmd == PHYSDEVOP_??? (physdev operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Notify end-of-interrupt (EOI) for the specified IRQ.
+ * @arg == pointer to physdev_eoi structure.
+ */
+#define PHYSDEVOP_eoi 12
+struct physdev_eoi {
+ /* IN */
+ uint32_t irq;
+};
+typedef struct physdev_eoi physdev_eoi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
+
+/*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_gmfn_v1 17
+/*
+ * Register a shared page for the hypervisor to indicate whether the
+ * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to
+ * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of
+ * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by
+ * Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_gmfn_v2 28
+struct physdev_pirq_eoi_gmfn {
+ /* IN */
+ xen_pfn_t gmfn;
+};
+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);
+
+/*
+ * Query the status of an IRQ line.
+ * @arg == pointer to physdev_irq_status_query structure.
+ */
+#define PHYSDEVOP_irq_status_query 5
+struct physdev_irq_status_query {
+ /* IN */
+ uint32_t irq;
+ /* OUT */
+ uint32_t flags; /* XENIRQSTAT_* */
+};
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
+
+/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
+#define _XENIRQSTAT_needs_eoi (0)
+#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi)
+
+/* IRQ shared by multiple guests? */
+#define _XENIRQSTAT_shared (1)
+#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared)
+
+/*
+ * Set the current VCPU's I/O privilege level.
+ * @arg == pointer to physdev_set_iopl structure.
+ */
+#define PHYSDEVOP_set_iopl 6
+struct physdev_set_iopl {
+ /* IN */
+ uint32_t iopl;
+};
+typedef struct physdev_set_iopl physdev_set_iopl_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
+
+/*
+ * Set the current VCPU's I/O-port permissions bitmap.
+ * @arg == pointer to physdev_set_iobitmap structure.
+ */
+#define PHYSDEVOP_set_iobitmap 7
+struct physdev_set_iobitmap {
+ /* IN */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+ XEN_GUEST_HANDLE(uint8) bitmap;
+#else
+ uint8_t *bitmap;
+#endif
+ uint32_t nr_ports;
+};
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
+
+/*
+ * Read or write an IO-APIC register.
+ * @arg == pointer to physdev_apic structure.
+ */
+#define PHYSDEVOP_apic_read 8
+#define PHYSDEVOP_apic_write 9
+struct physdev_apic {
+ /* IN */
+ unsigned long apic_physbase;
+ uint32_t reg;
+ /* IN or OUT */
+ uint32_t value;
+};
+typedef struct physdev_apic physdev_apic_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
+
+/*
+ * Allocate or free a physical upcall vector for the specified IRQ line.
+ * @arg == pointer to physdev_irq structure.
+ */
+#define PHYSDEVOP_alloc_irq_vector 10
+#define PHYSDEVOP_free_irq_vector 11
+struct physdev_irq {
+ /* IN */
+ uint32_t irq;
+ /* IN or OUT */
+ uint32_t vector;
+};
+typedef struct physdev_irq physdev_irq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
+
+#define MAP_PIRQ_TYPE_MSI 0x0
+#define MAP_PIRQ_TYPE_GSI 0x1
+#define MAP_PIRQ_TYPE_UNKNOWN 0x2
+#define MAP_PIRQ_TYPE_MSI_SEG 0x3
+
+#define PHYSDEVOP_map_pirq 13
+struct physdev_map_pirq {
+ domid_t domid;
+ /* IN */
+ int type;
+ /* IN */
+ int index;
+ /* IN or OUT */
+ int pirq;
+ /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
+ int bus;
+ /* IN */
+ int devfn;
+ /* IN */
+ int entry_nr;
+ /* IN */
+ uint64_t table_base;
+};
+typedef struct physdev_map_pirq physdev_map_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
+
+#define PHYSDEVOP_unmap_pirq 14
+struct physdev_unmap_pirq {
+ domid_t domid;
+ /* IN */
+ int pirq;
+};
+
+typedef struct physdev_unmap_pirq physdev_unmap_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t);
+
+#define PHYSDEVOP_manage_pci_add 15
+#define PHYSDEVOP_manage_pci_remove 16
+struct physdev_manage_pci {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+};
+
+typedef struct physdev_manage_pci physdev_manage_pci_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
+
+#define PHYSDEVOP_restore_msi 19
+struct physdev_restore_msi {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+};
+typedef struct physdev_restore_msi physdev_restore_msi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t);
+
+#define PHYSDEVOP_manage_pci_add_ext 20
+struct physdev_manage_pci_ext {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+ unsigned is_extfn;
+ unsigned is_virtfn;
+ struct {
+ uint8_t bus;
+ uint8_t devfn;
+ } physfn;
+};
+
+typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t);
+
+/*
+ * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
+ * hypercall since 0x00030202.
+ */
+struct physdev_op {
+ uint32_t cmd;
+ union {
+ struct physdev_irq_status_query irq_status_query;
+ struct physdev_set_iopl set_iopl;
+ struct physdev_set_iobitmap set_iobitmap;
+ struct physdev_apic apic_op;
+ struct physdev_irq irq_op;
+ } u;
+};
+typedef struct physdev_op physdev_op_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
+
+#define PHYSDEVOP_setup_gsi 21
+struct physdev_setup_gsi {
+ int gsi;
+ /* IN */
+ uint8_t triggering;
+ /* IN */
+ uint8_t polarity;
+ /* IN */
+};
+
+typedef struct physdev_setup_gsi physdev_setup_gsi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t);
+
+/* leave PHYSDEVOP 22 free */
+
+/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI
+ * the hypercall returns a free pirq */
+#define PHYSDEVOP_get_free_pirq 23
+struct physdev_get_free_pirq {
+ /* IN */
+ int type;
+ /* OUT */
+ uint32_t pirq;
+};
+
+typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);
+
+#define XEN_PCI_MMCFG_RESERVED 0x1
+
+#define PHYSDEVOP_pci_mmcfg_reserved 24
+struct physdev_pci_mmcfg_reserved {
+ uint64_t address;
+ uint16_t segment;
+ uint8_t start_bus;
+ uint8_t end_bus;
+ uint32_t flags;
+};
+typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);
+
+#define XEN_PCI_DEV_EXTFN 0x1
+#define XEN_PCI_DEV_VIRTFN 0x2
+#define XEN_PCI_DEV_PXM 0x4
+
+#define PHYSDEVOP_pci_device_add 25
+struct physdev_pci_device_add {
+ /* IN */
+ uint16_t seg;
+ uint8_t bus;
+ uint8_t devfn;
+ uint32_t flags;
+ struct {
+ uint8_t bus;
+ uint8_t devfn;
+ } physfn;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint32_t optarr[];
+#elif defined(__GNUC__)
+ uint32_t optarr[0];
+#endif
+};
+typedef struct physdev_pci_device_add physdev_pci_device_add_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
+
+#define PHYSDEVOP_pci_device_remove 26
+#define PHYSDEVOP_restore_msi_ext 27
+/*
+ * Dom0 should use these two to announce MMIO resources assigned to
+ * MSI-X capable devices won't (prepare) or may (release) change.
+ */
+#define PHYSDEVOP_prepare_msix 30
+#define PHYSDEVOP_release_msix 31
+struct physdev_pci_device {
+ /* IN */
+ uint16_t seg;
+ uint8_t bus;
+ uint8_t devfn;
+};
+typedef struct physdev_pci_device physdev_pci_device_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
+
+#define PHYSDEVOP_DBGP_RESET_PREPARE 1
+#define PHYSDEVOP_DBGP_RESET_DONE 2
+
+#define PHYSDEVOP_DBGP_BUS_UNKNOWN 0
+#define PHYSDEVOP_DBGP_BUS_PCI 1
+
+#define PHYSDEVOP_dbgp_op 29
+struct physdev_dbgp_op {
+ /* IN */
+ uint8_t op;
+ uint8_t bus;
+ union {
+ struct physdev_pci_device pci;
+ } u;
+};
+typedef struct physdev_dbgp_op physdev_dbgp_op_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t);
+
+/*
+ * Notify that some PIRQ-bound event channels have been unmasked.
+ * ** This command is obsolete since interface version 0x00030202 and is **
+ * ** unsupported by newer versions of Xen. **
+ */
+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4
+
+/*
+ * These all-capitals physdev operation names are superceded by the new names
+ * (defined above) since interface version 0x00030202.
+ */
+#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query
+#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl
+#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap
+#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read
+#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write
+#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector
+#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector
+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
+#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040200
+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1
+#else
+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2
+#endif
+
+#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/platform.h
@@ -1,0 +1,572 @@
+/******************************************************************************
+ * platform.h
+ *
+ * Hardware platform operations. Intended for use by domain-0 kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_PLATFORM_H__
+#define __XEN_PUBLIC_PLATFORM_H__
+
+#include "xen.h"
+
+#define XENPF_INTERFACE_VERSION 0x03000001
+
+/*
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
+ * 1 January, 1970 if the current system time was <system_time>.
+ */
+#define XENPF_settime 17
+struct xenpf_settime {
+ /* IN variables. */
+ uint32_t secs;
+ uint32_t nsecs;
+ uint64_t system_time;
+};
+typedef struct xenpf_settime xenpf_settime_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t);
+
+/*
+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
+ * On x86, @type is an architecture-defined MTRR memory type.
+ * On success, returns the MTRR that was used (@reg) and a handle that can
+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.
+ * (x86-specific).
+ */
+#define XENPF_add_memtype 31
+struct xenpf_add_memtype {
+ /* IN variables. */
+ xen_pfn_t mfn;
+ uint64_t nr_mfns;
+ uint32_t type;
+ /* OUT variables. */
+ uint32_t handle;
+ uint32_t reg;
+};
+typedef struct xenpf_add_memtype xenpf_add_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t);
+
+/*
+ * Tear down an existing memory-range type. If @handle is remembered then it
+ * should be passed in to accurately tear down the correct setting (in case
+ * of overlapping memory regions with differing types). If it is not known
+ * then @handle should be set to zero. In all cases @reg must be set.
+ * (x86-specific).
+ */
+#define XENPF_del_memtype 32
+struct xenpf_del_memtype {
+ /* IN variables. */
+ uint32_t handle;
+ uint32_t reg;
+};
+typedef struct xenpf_del_memtype xenpf_del_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t);
+
+/* Read current type of an MTRR (x86-specific). */
+#define XENPF_read_memtype 33
+struct xenpf_read_memtype {
+ /* IN variables. */
+ uint32_t reg;
+ /* OUT variables. */
+ xen_pfn_t mfn;
+ uint64_t nr_mfns;
+ uint32_t type;
+};
+typedef struct xenpf_read_memtype xenpf_read_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t);
+
+#define XENPF_microcode_update 35
+struct xenpf_microcode_update {
+ /* IN variables. */
+ XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */
+ uint32_t length; /* Length of microcode data. */
+};
+typedef struct xenpf_microcode_update xenpf_microcode_update_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t);
+
+#define XENPF_platform_quirk 39
+#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */
+#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */
+#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */
+struct xenpf_platform_quirk {
+ /* IN variables. */
+ uint32_t quirk_id;
+};
+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
+
+#define XENPF_efi_runtime_call 49
+#define XEN_EFI_get_time 1
+#define XEN_EFI_set_time 2
+#define XEN_EFI_get_wakeup_time 3
+#define XEN_EFI_set_wakeup_time 4
+#define XEN_EFI_get_next_high_monotonic_count 5
+#define XEN_EFI_get_variable 6
+#define XEN_EFI_set_variable 7
+#define XEN_EFI_get_next_variable_name 8
+#define XEN_EFI_query_variable_info 9
+#define XEN_EFI_query_capsule_capabilities 10
+#define XEN_EFI_update_capsule 11
+struct xenpf_efi_runtime_call {
+ uint32_t function;
+ /*
+ * This field is generally used for per sub-function flags (defined
+ * below), except for the XEN_EFI_get_next_high_monotonic_count case,
+ * where it holds the single returned value.
+ */
+ uint32_t misc;
+ unsigned long status;
+ union {
+#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001
+ struct {
+ struct xenpf_efi_time {
+ uint16_t year;
+ uint8_t month;
+ uint8_t day;
+ uint8_t hour;
+ uint8_t min;
+ uint8_t sec;
+ uint32_t ns;
+ int16_t tz;
+ uint8_t daylight;
+ } time;
+ uint32_t resolution;
+ uint32_t accuracy;
+ } get_time;
+
+ struct xenpf_efi_time set_time;
+
+#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001
+#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002
+ struct xenpf_efi_time get_wakeup_time;
+
+#define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001
+#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002
+ struct xenpf_efi_time set_wakeup_time;
+
+#define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001
+#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002
+#define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004
+ struct {
+ XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */
+ unsigned long size;
+ XEN_GUEST_HANDLE(void) data;
+ struct xenpf_efi_guid {
+ uint32_t data1;
+ uint16_t data2;
+ uint16_t data3;
+ uint8_t data4[8];
+ } vendor_guid;
+ } get_variable, set_variable;
+
+ struct {
+ unsigned long size;
+ XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */
+ struct xenpf_efi_guid vendor_guid;
+ } get_next_variable_name;
+
+#define XEN_EFI_VARINFO_BOOT_SNAPSHOT 0x00000001
+ struct {
+ uint32_t attr;
+ uint64_t max_store_size;
+ uint64_t remain_store_size;
+ uint64_t max_size;
+ } query_variable_info;
+
+ struct {
+ XEN_GUEST_HANDLE(void) capsule_header_array;
+ unsigned long capsule_count;
+ uint64_t max_capsule_size;
+ unsigned int reset_type;
+ } query_capsule_capabilities;
+
+ struct {
+ XEN_GUEST_HANDLE(void) capsule_header_array;
+ unsigned long capsule_count;
+ uint64_t sg_list; /* machine address */
+ } update_capsule;
+ } u;
+};
+typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t);
+
+#define XENPF_firmware_info 50
+#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */
+#define XEN_FW_EFI_INFO 4 /* from EFI */
+#define XEN_FW_EFI_VERSION 0
+#define XEN_FW_EFI_CONFIG_TABLE 1
+#define XEN_FW_EFI_VENDOR 2
+#define XEN_FW_EFI_MEM_INFO 3
+#define XEN_FW_EFI_RT_VERSION 4
+#define XEN_FW_EFI_PCI_ROM 5
+#define XEN_FW_KBD_SHIFT_FLAGS 5
+struct xenpf_firmware_info {
+ /* IN variables. */
+ uint32_t type;
+ uint32_t index;
+ /* OUT variables. */
+ union {
+ struct {
+ /* Int13, Fn48: Check Extensions Present. */
+ uint8_t device; /* %dl: bios device number */
+ uint8_t version; /* %ah: major version */
+ uint16_t interface_support; /* %cx: support bitmap */
+ /* Int13, Fn08: Legacy Get Device Parameters. */
+ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */
+ uint8_t legacy_max_head; /* %dh: max head # */
+ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */
+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+ /* NB. First uint16_t of buffer must be set to buffer size. */
+ XEN_GUEST_HANDLE(void) edd_params;
+ } disk_info; /* XEN_FW_DISK_INFO */
+ struct {
+ uint8_t device; /* bios device number */
+ uint32_t mbr_signature; /* offset 0x1b8 in mbr */
+ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+ struct {
+ /* Int10, AX=4F15: Get EDID info. */
+ uint8_t capabilities;
+ uint8_t edid_transfer_time;
+ /* must refer to 128-byte buffer */
+ XEN_GUEST_HANDLE(uint8) edid;
+ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+ union xenpf_efi_info {
+ uint32_t version;
+ struct {
+ uint64_t addr; /* EFI_CONFIGURATION_TABLE */
+ uint32_t nent;
+ } cfg;
+ struct {
+ uint32_t revision;
+ uint32_t bufsz; /* input, in bytes */
+ XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */
+ } vendor;
+ struct {
+ uint64_t addr;
+ uint64_t size;
+ uint64_t attr;
+ uint32_t type;
+ } mem;
+ struct {
+ /* IN variables */
+ uint16_t segment;
+ uint8_t bus;
+ uint8_t devfn;
+ uint16_t vendor;
+ uint16_t devid;
+ /* OUT variables */
+ uint64_t address;
+ xen_ulong_t size;
+ } pci_rom;
+ } efi_info; /* XEN_FW_EFI_INFO */
+
+ /* Int16, Fn02: Get keyboard shift flags. */
+ uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */
+ } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
+#define XENPF_enter_acpi_sleep 51
+struct xenpf_enter_acpi_sleep {
+ /* IN variables */
+#if __XEN_INTERFACE_VERSION__ < 0x00040300
+ uint16_t pm1a_cnt_val; /* PM1a control value. */
+ uint16_t pm1b_cnt_val; /* PM1b control value. */
+#else
+ uint16_t val_a; /* PM1a control / sleep type A. */
+ uint16_t val_b; /* PM1b control / sleep type B. */
+#endif
+ uint32_t sleep_state; /* Which state to enter (Sn). */
+#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001
+ uint32_t flags; /* XENPF_ACPI_SLEEP_*. */
+};
+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t);
+
+#define XENPF_change_freq 52
+struct xenpf_change_freq {
+ /* IN variables */
+ uint32_t flags; /* Must be zero. */
+ uint32_t cpu; /* Physical cpu. */
+ uint64_t freq; /* New frequency (Hz). */
+};
+typedef struct xenpf_change_freq xenpf_change_freq_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t);
+
+/*
+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the
+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is
+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap
+ * bit set are written to. On return, @cpumap_bitmap is modified so that any
+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry
+ * cleared.
+ */
+#define XENPF_getidletime 53
+struct xenpf_getidletime {
+ /* IN/OUT variables */
+ /* IN: CPUs to interrogate; OUT: subset of IN which are present */
+ XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
+ /* IN variables */
+ /* Size of cpumap bitmap. */
+ uint32_t cpumap_nr_cpus;
+ /* Must be indexable for every cpu in cpumap_bitmap. */
+ XEN_GUEST_HANDLE(uint64) idletime;
+ /* OUT variables */
+ /* System time when the idletime snapshots were taken. */
+ uint64_t now;
+};
+typedef struct xenpf_getidletime xenpf_getidletime_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
+
+#define XENPF_set_processor_pminfo 54
+
+/* ability bits */
+#define XEN_PROCESSOR_PM_CX 1
+#define XEN_PROCESSOR_PM_PX 2
+#define XEN_PROCESSOR_PM_TX 4
+
+/* cmd type */
+#define XEN_PM_CX 0
+#define XEN_PM_PX 1
+#define XEN_PM_TX 2
+#define XEN_PM_PDC 3
+
+/* Px sub info type */
+#define XEN_PX_PCT 1
+#define XEN_PX_PSS 2
+#define XEN_PX_PPC 4
+#define XEN_PX_PSD 8
+
+struct xen_power_register {
+ uint32_t space_id;
+ uint32_t bit_width;
+ uint32_t bit_offset;
+ uint32_t access_size;
+ uint64_t address;
+};
+
+struct xen_processor_csd {
+ uint32_t domain; /* domain number of one dependent group */
+ uint32_t coord_type; /* coordination type */
+ uint32_t num; /* number of processors in same domain */
+};
+typedef struct xen_processor_csd xen_processor_csd_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t);
+
+struct xen_processor_cx {
+ struct xen_power_register reg; /* GAS for Cx trigger register */
+ uint8_t type; /* cstate value, c0: 0, c1: 1, ... */
+ uint32_t latency; /* worst latency (ms) to enter/exit this cstate */
+ uint32_t power; /* average power consumption(mW) */
+ uint32_t dpcnt; /* number of dependency entries */
+ XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */
+};
+typedef struct xen_processor_cx xen_processor_cx_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t);
+
+struct xen_processor_flags {
+ uint32_t bm_control:1;
+ uint32_t bm_check:1;
+ uint32_t has_cst:1;
+ uint32_t power_setup_done:1;
+ uint32_t bm_rld_set:1;
+};
+
+struct xen_processor_power {
+ uint32_t count; /* number of C state entries in array below */
+ struct xen_processor_flags flags; /* global flags of this processor */
+ XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */
+};
+
+struct xen_pct_register {
+ uint8_t descriptor;
+ uint16_t length;
+ uint8_t space_id;
+ uint8_t bit_width;
+ uint8_t bit_offset;
+ uint8_t reserved;
+ uint64_t address;
+};
+
+struct xen_processor_px {
+ uint64_t core_frequency; /* megahertz */
+ uint64_t power; /* milliWatts */
+ uint64_t transition_latency; /* microseconds */
+ uint64_t bus_master_latency; /* microseconds */
+ uint64_t control; /* control value */
+ uint64_t status; /* success indicator */
+};
+typedef struct xen_processor_px xen_processor_px_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t);
+
+struct xen_psd_package {
+ uint64_t num_entries;
+ uint64_t revision;
+ uint64_t domain;
+ uint64_t coord_type;
+ uint64_t num_processors;
+};
+
+struct xen_processor_performance {
+ uint32_t flags; /* flag for Px sub info type */
+ uint32_t platform_limit; /* Platform limitation on freq usage */
+ struct xen_pct_register control_register;
+ struct xen_pct_register status_register;
+ uint32_t state_count; /* total available performance states */
+ XEN_GUEST_HANDLE(xen_processor_px_t) states;
+ struct xen_psd_package domain_info;
+ uint32_t shared_type; /* coordination type of this processor */
+};
+typedef struct xen_processor_performance xen_processor_performance_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t);
+
+struct xenpf_set_processor_pminfo {
+ /* IN variables */
+ uint32_t id; /* ACPI CPU ID */
+ uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */
+ union {
+ struct xen_processor_power power;/* Cx: _CST/_CSD */
+ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+ XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */
+ } u;
+};
+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);
+
+#define XENPF_get_cpuinfo 55
+struct xenpf_pcpuinfo {
+ /* IN */
+ uint32_t xen_cpuid;
+ /* OUT */
+ /* The maxium cpu_id that is present */
+ uint32_t max_present;
+#define XEN_PCPU_FLAGS_ONLINE 1
+ /* Correponding xen_cpuid is not present*/
+#define XEN_PCPU_FLAGS_INVALID 2
+ uint32_t flags;
+ uint32_t apic_id;
+ uint32_t acpi_id;
+};
+typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t);
+
+#define XENPF_get_cpu_version 48
+struct xenpf_pcpu_version {
+ /* IN */
+ uint32_t xen_cpuid;
+ /* OUT */
+ /* The maxium cpu_id that is present */
+ uint32_t max_present;
+ char vendor_id[12];
+ uint32_t family;
+ uint32_t model;
+ uint32_t stepping;
+};
+typedef struct xenpf_pcpu_version xenpf_pcpu_version_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t);
+
+#define XENPF_cpu_online 56
+#define XENPF_cpu_offline 57
+struct xenpf_cpu_ol
+{
+ uint32_t cpuid;
+};
+typedef struct xenpf_cpu_ol xenpf_cpu_ol_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t);
+
+#define XENPF_cpu_hotadd 58
+struct xenpf_cpu_hotadd
+{
+ uint32_t apic_id;
+ uint32_t acpi_id;
+ uint32_t pxm;
+};
+
+#define XENPF_mem_hotadd 59
+struct xenpf_mem_hotadd
+{
+ uint64_t spfn;
+ uint64_t epfn;
+ uint32_t pxm;
+ uint32_t flags;
+};
+
+#define XENPF_core_parking 60
+
+#define XEN_CORE_PARKING_SET 1
+#define XEN_CORE_PARKING_GET 2
+struct xenpf_core_parking {
+ /* IN variables */
+ uint32_t type;
+ /* IN variables: set cpu nums expected to be idled */
+ /* OUT variables: get cpu nums actually be idled */
+ uint32_t idle_nums;
+};
+typedef struct xenpf_core_parking xenpf_core_parking_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t);
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_platform_op(const struct xen_platform_op*);
+ */
+struct xen_platform_op {
+ uint32_t cmd;
+ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+ union {
+ struct xenpf_settime settime;
+ struct xenpf_add_memtype add_memtype;
+ struct xenpf_del_memtype del_memtype;
+ struct xenpf_read_memtype read_memtype;
+ struct xenpf_microcode_update microcode;
+ struct xenpf_platform_quirk platform_quirk;
+ struct xenpf_efi_runtime_call efi_runtime_call;
+ struct xenpf_firmware_info firmware_info;
+ struct xenpf_enter_acpi_sleep enter_acpi_sleep;
+ struct xenpf_change_freq change_freq;
+ struct xenpf_getidletime getidletime;
+ struct xenpf_set_processor_pminfo set_pminfo;
+ struct xenpf_pcpuinfo pcpu_info;
+ struct xenpf_pcpu_version pcpu_version;
+ struct xenpf_cpu_ol cpu_ol;
+ struct xenpf_cpu_hotadd cpu_add;
+ struct xenpf_mem_hotadd mem_add;
+ struct xenpf_core_parking core_parking;
+ uint8_t pad[128];
+ } u;
+};
+typedef struct xen_platform_op xen_platform_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t);
+
+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/sched.h
@@ -1,0 +1,174 @@
+/******************************************************************************
+ * sched.h
+ *
+ * Scheduler state interactions
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_SCHED_H__
+#define __XEN_PUBLIC_SCHED_H__
+
+#include "event_channel.h"
+
+/*
+ * `incontents 150 sched Guest Scheduler Operations
+ *
+ * The SCHEDOP interface provides mechanisms for a guest to interact
+ * with the scheduler, including yield, blocking and shutting itself
+ * down.
+ */
+
+/*
+ * The prototype for this hypercall is:
+ * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
+ *
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == Operation-specific extra argument(s), as described below.
+ * ... == Additional Operation-specific extra arguments, described below.
+ *
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
+ * of this hypercall, supporting only the commands yield, block and shutdown:
+ * long sched_op(int cmd, unsigned long arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == 0 (SCHEDOP_yield and SCHEDOP_block)
+ * == SHUTDOWN_* code (SCHEDOP_shutdown)
+ *
+ * This legacy version is available to new guests as:
+ * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
+ */
+
+/* ` enum sched_op { // SCHEDOP_* => struct sched_* */
+/*
+ * Voluntarily yield the CPU.
+ * @arg == NULL.
+ */
+#define SCHEDOP_yield 0
+
+/*
+ * Block execution of this VCPU until an event is received for processing.
+ * If called with event upcalls masked, this operation will atomically
+ * reenable event delivery and check for pending events before blocking the
+ * VCPU. This avoids a "wakeup waiting" race.
+ * @arg == NULL.
+ */
+#define SCHEDOP_block 1
+
+/*
+ * Halt execution of this domain (all VCPUs) and notify the system controller.
+ * @arg == pointer to sched_shutdown_t structure.
+ *
+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then this
+ * hypercall takes an additional extra argument which should be the
+ * MFN of the guest's start_info_t.
+ *
+ * In addition, which reason is SHUTDOWN_suspend this hypercall
+ * returns 1 if suspend was cancelled or the domain was merely
+ * checkpointed, and 0 if it is resuming in a new domain.
+ */
+#define SCHEDOP_shutdown 2
+
+/*
+ * Poll a set of event-channel ports. Return when one or more are pending. An
+ * optional timeout may be specified.
+ * @arg == pointer to sched_poll_t structure.
+ */
+#define SCHEDOP_poll 3
+
+/*
+ * Declare a shutdown for another domain. The main use of this function is
+ * in interpreting shutdown requests and reasons for fully-virtualized
+ * domains. A para-virtualized domain may use SCHEDOP_shutdown directly.
+ * @arg == pointer to sched_remote_shutdown_t structure.
+ */
+#define SCHEDOP_remote_shutdown 4
+
+/*
+ * Latch a shutdown code, so that when the domain later shuts down it
+ * reports this code to the control tools.
+ * @arg == sched_shutdown_t, as for SCHEDOP_shutdown.
+ */
+#define SCHEDOP_shutdown_code 5
+
+/*
+ * Setup, poke and destroy a domain watchdog timer.
+ * @arg == pointer to sched_watchdog_t structure.
+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
+ * after timeout, returns watchdog id.
+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
+ */
+#define SCHEDOP_watchdog 6
+/* ` } */
+
+struct sched_shutdown {
+ unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */
+};
+typedef struct sched_shutdown sched_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
+
+struct sched_poll {
+ XEN_GUEST_HANDLE(evtchn_port_t) ports;
+ unsigned int nr_ports;
+ uint64_t timeout;
+};
+typedef struct sched_poll sched_poll_t;
+DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
+
+struct sched_remote_shutdown {
+ domid_t domain_id; /* Remote domain ID */
+ unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
+
+struct sched_watchdog {
+ uint32_t id; /* watchdog ID */
+ uint32_t timeout; /* timeout */
+};
+typedef struct sched_watchdog sched_watchdog_t;
+DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
+
+/*
+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
+ * software to determine the appropriate action. For the most part, Xen does
+ * not care about the shutdown code.
+ */
+/* ` enum sched_shutdown_reason { */
+#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */
+#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */
+#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
+#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
+#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */
+#define SHUTDOWN_MAX 4 /* Maximum valid shutdown reason. */
+/* ` } */
+
+#endif /* __XEN_PUBLIC_SCHED_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/sysctl.h
@@ -1,0 +1,694 @@
+/******************************************************************************
+ * sysctl.h
+ *
+ * System management operations. For use by node control stack.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_SYSCTL_H__
+#define __XEN_PUBLIC_SYSCTL_H__
+
+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)
+#error "sysctl operations are intended for use by node control tools only"
+#endif
+
+#include "xen.h"
+#include "domctl.h"
+
+#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000A
+
+/*
+ * Read console content from Xen buffer ring.
+ */
+/* XEN_SYSCTL_readconsole */
+struct xen_sysctl_readconsole {
+ /* IN: Non-zero -> clear after reading. */
+ uint8_t clear;
+ /* IN: Non-zero -> start index specified by @index field. */
+ uint8_t incremental;
+ uint8_t pad0, pad1;
+ /*
+ * IN: Start index for consuming from ring buffer (if @incremental);
+ * OUT: End index after consuming from ring buffer.
+ */
+ uint32_t index;
+ /* IN: Virtual address to write console data. */
+ XEN_GUEST_HANDLE_64(char) buffer;
+ /* IN: Size of buffer; OUT: Bytes written to buffer. */
+ uint32_t count;
+};
+typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t);
+
+/* Get trace buffers machine base address */
+/* XEN_SYSCTL_tbuf_op */
+struct xen_sysctl_tbuf_op {
+ /* IN variables */
+#define XEN_SYSCTL_TBUFOP_get_info 0
+#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1
+#define XEN_SYSCTL_TBUFOP_set_evt_mask 2
+#define XEN_SYSCTL_TBUFOP_set_size 3
+#define XEN_SYSCTL_TBUFOP_enable 4
+#define XEN_SYSCTL_TBUFOP_disable 5
+ uint32_t cmd;
+ /* IN/OUT variables */
+ struct xenctl_bitmap cpu_mask;
+ uint32_t evt_mask;
+ /* OUT variables */
+ uint64_aligned_t buffer_mfn;
+ uint32_t size; /* Also an IN variable! */
+};
+typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);
+
+/*
+ * Get physical information about the host machine
+ */
+/* XEN_SYSCTL_physinfo */
+ /* (x86) The platform supports HVM guests. */
+#define _XEN_SYSCTL_PHYSCAP_hvm 0
+#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm)
+ /* (x86) The platform supports HVM-guest direct access to I/O devices. */
+#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1
+#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio)
+struct xen_sysctl_physinfo {
+ uint32_t threads_per_core;
+ uint32_t cores_per_socket;
+ uint32_t nr_cpus; /* # CPUs currently online */
+ uint32_t max_cpu_id; /* Largest possible CPU ID on this host */
+ uint32_t nr_nodes; /* # nodes currently online */
+ uint32_t max_node_id; /* Largest possible node ID on this host */
+ uint32_t cpu_khz;
+ uint64_aligned_t total_pages;
+ uint64_aligned_t free_pages;
+ uint64_aligned_t scrub_pages;
+ uint64_aligned_t outstanding_pages;
+ uint32_t hw_cap[8];
+
+ /* XEN_SYSCTL_PHYSCAP_??? */
+ uint32_t capabilities;
+};
+typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);
+
+/*
+ * Get the ID of the current scheduler.
+ */
+/* XEN_SYSCTL_sched_id */
+struct xen_sysctl_sched_id {
+ /* OUT variable */
+ uint32_t sched_id;
+};
+typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t);
+
+/* Interface for controlling Xen software performance counters. */
+/* XEN_SYSCTL_perfc_op */
+/* Sub-operations: */
+#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */
+#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */
+struct xen_sysctl_perfc_desc {
+ char name[80]; /* name of perf counter */
+ uint32_t nr_vals; /* number of values for this counter */
+};
+typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t);
+typedef uint32_t xen_sysctl_perfc_val_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t);
+
+struct xen_sysctl_perfc_op {
+ /* IN variables. */
+ uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */
+ /* OUT variables. */
+ uint32_t nr_counters; /* number of counters description */
+ uint32_t nr_vals; /* number of values */
+ /* counter information (or NULL) */
+ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
+ /* counter values (or NULL) */
+ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
+};
+typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
+
+/* XEN_SYSCTL_getdomaininfolist */
+struct xen_sysctl_getdomaininfolist {
+ /* IN variables. */
+ domid_t first_domain;
+ uint32_t max_domains;
+ XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
+ /* OUT variables. */
+ uint32_t num_domains;
+};
+typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
+
+/* Inject debug keys into Xen. */
+/* XEN_SYSCTL_debug_keys */
+struct xen_sysctl_debug_keys {
+ /* IN variables. */
+ XEN_GUEST_HANDLE_64(char) keys;
+ uint32_t nr_keys;
+};
+typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+/* XEN_SYSCTL_getcpuinfo */
+struct xen_sysctl_cpuinfo {
+ uint64_aligned_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t);
+struct xen_sysctl_getcpuinfo {
+ /* IN variables. */
+ uint32_t max_cpus;
+ XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+ /* OUT variables. */
+ uint32_t nr_cpus;
+};
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t);
+
+/* XEN_SYSCTL_availheap */
+struct xen_sysctl_availheap {
+ /* IN variables. */
+ uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */
+ uint32_t max_bitwidth; /* Largest address width (zero if don't care). */
+ int32_t node; /* NUMA node of interest (-1 for all nodes). */
+ /* OUT variables. */
+ uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */
+};
+typedef struct xen_sysctl_availheap xen_sysctl_availheap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t);
+
+/* XEN_SYSCTL_get_pmstat */
+struct pm_px_val {
+ uint64_aligned_t freq; /* Px core frequency */
+ uint64_aligned_t residency; /* Px residency time */
+ uint64_aligned_t count; /* Px transition count */
+};
+typedef struct pm_px_val pm_px_val_t;
+DEFINE_XEN_GUEST_HANDLE(pm_px_val_t);
+
+struct pm_px_stat {
+ uint8_t total; /* total Px states */
+ uint8_t usable; /* usable Px states */
+ uint8_t last; /* last Px state */
+ uint8_t cur; /* current Px state */
+ XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */
+ XEN_GUEST_HANDLE_64(pm_px_val_t) pt;
+};
+typedef struct pm_px_stat pm_px_stat_t;
+DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t);
+
+struct pm_cx_stat {
+ uint32_t nr; /* entry nr in triggers & residencies, including C0 */
+ uint32_t last; /* last Cx state */
+ uint64_aligned_t idle_time; /* idle time from boot */
+ XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */
+ XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
+ uint64_aligned_t pc2;
+ uint64_aligned_t pc3;
+ uint64_aligned_t pc6;
+ uint64_aligned_t pc7;
+ uint64_aligned_t cc3;
+ uint64_aligned_t cc6;
+ uint64_aligned_t cc7;
+};
+
+struct xen_sysctl_get_pmstat {
+#define PMSTAT_CATEGORY_MASK 0xf0
+#define PMSTAT_PX 0x10
+#define PMSTAT_CX 0x20
+#define PMSTAT_get_max_px (PMSTAT_PX | 0x1)
+#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2)
+#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3)
+#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1)
+#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2)
+#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3)
+ uint32_t type;
+ uint32_t cpuid;
+ union {
+ struct pm_px_stat getpx;
+ struct pm_cx_stat getcx;
+ /* other struct for tx, etc */
+ } u;
+};
+typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t);
+
+/* XEN_SYSCTL_cpu_hotplug */
+struct xen_sysctl_cpu_hotplug {
+ /* IN variables */
+ uint32_t cpu; /* Physical cpu. */
+#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0
+#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1
+ uint32_t op; /* hotplug opcode */
+};
+typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t);
+
+/*
+ * Get/set xen power management, include
+ * 1. cpufreq governors and related parameters
+ */
+/* XEN_SYSCTL_pm_op */
+struct xen_userspace {
+ uint32_t scaling_setspeed;
+};
+typedef struct xen_userspace xen_userspace_t;
+
+struct xen_ondemand {
+ uint32_t sampling_rate_max;
+ uint32_t sampling_rate_min;
+
+ uint32_t sampling_rate;
+ uint32_t up_threshold;
+};
+typedef struct xen_ondemand xen_ondemand_t;
+
+/*
+ * cpufreq para name of this structure named
+ * same as sysfs file name of native linux
+ */
+#define CPUFREQ_NAME_LEN 16
+struct xen_get_cpufreq_para {
+ /* IN/OUT variable */
+ uint32_t cpu_num;
+ uint32_t freq_num;
+ uint32_t gov_num;
+
+ /* for all governors */
+ /* OUT variable */
+ XEN_GUEST_HANDLE_64(uint32) affected_cpus;
+ XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies;
+ XEN_GUEST_HANDLE_64(char) scaling_available_governors;
+ char scaling_driver[CPUFREQ_NAME_LEN];
+
+ uint32_t cpuinfo_cur_freq;
+ uint32_t cpuinfo_max_freq;
+ uint32_t cpuinfo_min_freq;
+ uint32_t scaling_cur_freq;
+
+ char scaling_governor[CPUFREQ_NAME_LEN];
+ uint32_t scaling_max_freq;
+ uint32_t scaling_min_freq;
+
+ /* for specific governor */
+ union {
+ struct xen_userspace userspace;
+ struct xen_ondemand ondemand;
+ } u;
+
+ int32_t turbo_enabled;
+};
+
+struct xen_set_cpufreq_gov {
+ char scaling_governor[CPUFREQ_NAME_LEN];
+};
+
+struct xen_set_cpufreq_para {
+ #define SCALING_MAX_FREQ 1
+ #define SCALING_MIN_FREQ 2
+ #define SCALING_SETSPEED 3
+ #define SAMPLING_RATE 4
+ #define UP_THRESHOLD 5
+
+ uint32_t ctrl_type;
+ uint32_t ctrl_value;
+};
+
+struct xen_sysctl_pm_op {
+ #define PM_PARA_CATEGORY_MASK 0xf0
+ #define CPUFREQ_PARA 0x10
+
+ /* cpufreq command type */
+ #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01)
+ #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02)
+ #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03)
+ #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04)
+
+ /* set/reset scheduler power saving option */
+ #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21
+
+ /* cpuidle max_cstate access command */
+ #define XEN_SYSCTL_pm_op_get_max_cstate 0x22
+ #define XEN_SYSCTL_pm_op_set_max_cstate 0x23
+
+ /* set scheduler migration cost value */
+ #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24
+ #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25
+
+ /* enable/disable turbo mode when in dbs governor */
+ #define XEN_SYSCTL_pm_op_enable_turbo 0x26
+ #define XEN_SYSCTL_pm_op_disable_turbo 0x27
+
+ uint32_t cmd;
+ uint32_t cpuid;
+ union {
+ struct xen_get_cpufreq_para get_para;
+ struct xen_set_cpufreq_gov set_gov;
+ struct xen_set_cpufreq_para set_para;
+ uint64_aligned_t get_avgfreq;
+ uint32_t set_sched_opt_smt;
+ uint32_t get_max_cstate;
+ uint32_t set_max_cstate;
+ uint32_t get_vcpu_migration_delay;
+ uint32_t set_vcpu_migration_delay;
+ } u;
+};
+
+/* XEN_SYSCTL_page_offline_op */
+struct xen_sysctl_page_offline_op {
+ /* IN: range of page to be offlined */
+#define sysctl_page_offline 1
+#define sysctl_page_online 2
+#define sysctl_query_page_offline 3
+ uint32_t cmd;
+ uint32_t start;
+ uint32_t end;
+ /* OUT: result of page offline request */
+ /*
+ * bit 0~15: result flags
+ * bit 16~31: owner
+ */
+ XEN_GUEST_HANDLE(uint32) status;
+};
+
+#define PG_OFFLINE_STATUS_MASK (0xFFUL)
+
+/* The result is invalid, i.e. HV does not handle it */
+#define PG_OFFLINE_INVALID (0x1UL << 0)
+
+#define PG_OFFLINE_OFFLINED (0x1UL << 1)
+#define PG_OFFLINE_PENDING (0x1UL << 2)
+#define PG_OFFLINE_FAILED (0x1UL << 3)
+#define PG_OFFLINE_AGAIN (0x1UL << 4)
+
+#define PG_ONLINE_FAILED PG_OFFLINE_FAILED
+#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED
+
+#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1)
+#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2)
+#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3)
+#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4)
+
+#define PG_OFFLINE_MISC_MASK (0xFFUL << 4)
+
+/* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */
+#define PG_OFFLINE_XENPAGE (0x1UL << 8)
+#define PG_OFFLINE_DOM0PAGE (0x1UL << 9)
+#define PG_OFFLINE_ANONYMOUS (0x1UL << 10)
+#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11)
+#define PG_OFFLINE_OWNED (0x1UL << 12)
+
+#define PG_OFFLINE_BROKEN (0x1UL << 13)
+#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN
+
+#define PG_OFFLINE_OWNER_SHIFT 16
+
+/* XEN_SYSCTL_lockprof_op */
+/* Sub-operations: */
+#define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */
+#define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */
+/* Record-type: */
+#define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */
+#define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */
+#define LOCKPROF_TYPE_N 2 /* number of types */
+struct xen_sysctl_lockprof_data {
+ char name[40]; /* lock name (may include up to 2 %d specifiers) */
+ int32_t type; /* LOCKPROF_TYPE_??? */
+ int32_t idx; /* index (e.g. domain id) */
+ uint64_aligned_t lock_cnt; /* # of locking succeeded */
+ uint64_aligned_t block_cnt; /* # of wait for lock */
+ uint64_aligned_t lock_time; /* nsecs lock held */
+ uint64_aligned_t block_time; /* nsecs waited for lock */
+};
+typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t);
+struct xen_sysctl_lockprof_op {
+ /* IN variables. */
+ uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */
+ uint32_t max_elem; /* size of output buffer */
+ /* OUT variables (query only). */
+ uint32_t nr_elem; /* number of elements available */
+ uint64_aligned_t time; /* nsecs of profile measurement */
+ /* profile information (or NULL) */
+ XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data;
+};
+typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
+
+/* XEN_SYSCTL_topologyinfo */
+#define INVALID_TOPOLOGY_ID (~0U)
+struct xen_sysctl_topologyinfo {
+ /*
+ * IN: maximum addressable entry in the caller-provided arrays.
+ * OUT: largest cpu identifier in the system.
+ * If OUT is greater than IN then the arrays are truncated!
+ * If OUT is leass than IN then the array tails are not written by sysctl.
+ */
+ uint32_t max_cpu_index;
+
+ /*
+ * If not NULL, these arrays are filled with core/socket/node identifier
+ * for each cpu.
+ * If a cpu has no core/socket/node information (e.g., cpu not present)
+ * then the sentinel value ~0u is written to each array.
+ * The number of array elements written by the sysctl is:
+ * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1
+ */
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_core;
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
+};
+typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t);
+
+/* XEN_SYSCTL_numainfo */
+#define INVALID_NUMAINFO_ID (~0U)
+struct xen_sysctl_numainfo {
+ /*
+ * IN: maximum addressable entry in the caller-provided arrays.
+ * OUT: largest node identifier in the system.
+ * If OUT is greater than IN then the arrays are truncated!
+ */
+ uint32_t max_node_index;
+
+ /* NB. Entries are 0 if node is not present. */
+ XEN_GUEST_HANDLE_64(uint64) node_to_memsize;
+ XEN_GUEST_HANDLE_64(uint64) node_to_memfree;
+
+ /*
+ * Array, of size (max_node_index+1)^2, listing memory access distances
+ * between nodes. If an entry has no node distance information (e.g., node
+ * not present) then the value ~0u is written.
+ *
+ * Note that the array rows must be indexed by multiplying by the minimum
+ * of the caller-provided max_node_index and the returned value of
+ * max_node_index. That is, if the largest node index in the system is
+ * smaller than the caller can handle, a smaller 2-d array is constructed
+ * within the space provided by the caller. When this occurs, trailing
+ * space provided by the caller is not modified. If the largest node index
+ * in the system is larger than the caller can handle, then a 2-d array of
+ * the maximum size handleable by the caller is constructed.
+ */
+ XEN_GUEST_HANDLE_64(uint32) node_to_node_distance;
+};
+typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t);
+
+/* XEN_SYSCTL_cpupool_op */
+#define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */
+#define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */
+#define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */
+#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */
+#define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */
+#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */
+#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */
+#define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF
+struct xen_sysctl_cpupool_op {
+ uint32_t op; /* IN */
+ uint32_t cpupool_id; /* IN: CDIARM OUT: CI */
+ uint32_t sched_id; /* IN: C OUT: I */
+ uint32_t domid; /* IN: M */
+ uint32_t cpu; /* IN: AR */
+ uint32_t n_dom; /* OUT: I */
+ struct xenctl_bitmap cpumap; /* OUT: IF */
+};
+typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t);
+
+#define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64
+/*
+ * This structure is used to pass a new ARINC653 schedule from a
+ * privileged domain (ie dom0) to Xen.
+ */
+struct xen_sysctl_arinc653_schedule {
+ /* major_frame holds the time for the new schedule's major frame
+ * in nanoseconds. */
+ uint64_aligned_t major_frame;
+ /* num_sched_entries holds how many of the entries in the
+ * sched_entries[] array are valid. */
+ uint8_t num_sched_entries;
+ /* The sched_entries array holds the actual schedule entries. */
+ struct {
+ /* dom_handle must match a domain's UUID */
+ xen_domain_handle_t dom_handle;
+ /* If a domain has multiple VCPUs, vcpu_id specifies which one
+ * this schedule entry applies to. It should be set to 0 if
+ * there is only one VCPU for the domain. */
+ unsigned int vcpu_id;
+ /* runtime specifies the amount of time that should be allocated
+ * to this VCPU per major frame. It is specified in nanoseconds */
+ uint64_aligned_t runtime;
+ } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE];
+};
+typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t);
+
+struct xen_sysctl_credit_schedule {
+ /* Length of timeslice in milliseconds */
+#define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000
+#define XEN_SYSCTL_CSCHED_TSLICE_MIN 1
+ unsigned tslice_ms;
+ /* Rate limit (minimum timeslice) in microseconds */
+#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000
+#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100
+ unsigned ratelimit_us;
+};
+typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t);
+
+/* XEN_SYSCTL_scheduler_op */
+/* Set or get info? */
+#define XEN_SYSCTL_SCHEDOP_putinfo 0
+#define XEN_SYSCTL_SCHEDOP_getinfo 1
+struct xen_sysctl_scheduler_op {
+ uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */
+ uint32_t sched_id; /* XEN_SCHEDULER_* (domctl.h) */
+ uint32_t cmd; /* XEN_SYSCTL_SCHEDOP_* */
+ union {
+ struct xen_sysctl_sched_arinc653 {
+ XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule;
+ } sched_arinc653;
+ struct xen_sysctl_credit_schedule sched_credit;
+ } u;
+};
+typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t);
+
+/* XEN_SYSCTL_coverage_op */
+/*
+ * Get total size of information, to help allocate
+ * the buffer. The pointer points to a 32 bit value.
+ */
+#define XEN_SYSCTL_COVERAGE_get_total_size 0
+
+/*
+ * Read coverage information in a single run
+ * You must use a tool to split them.
+ */
+#define XEN_SYSCTL_COVERAGE_read 1
+
+/*
+ * Reset all the coverage counters to 0
+ * No parameters.
+ */
+#define XEN_SYSCTL_COVERAGE_reset 2
+
+/*
+ * Like XEN_SYSCTL_COVERAGE_read but reset also
+ * counters to 0 in a single call.
+ */
+#define XEN_SYSCTL_COVERAGE_read_and_reset 3
+
+struct xen_sysctl_coverage_op {
+ uint32_t cmd; /* XEN_SYSCTL_COVERAGE_* */
+ union {
+ uint32_t total_size; /* OUT */
+ XEN_GUEST_HANDLE_64(uint8) raw_info; /* OUT */
+ } u;
+};
+typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);
+
+
+struct xen_sysctl {
+ uint32_t cmd;
+#define XEN_SYSCTL_readconsole 1
+#define XEN_SYSCTL_tbuf_op 2
+#define XEN_SYSCTL_physinfo 3
+#define XEN_SYSCTL_sched_id 4
+#define XEN_SYSCTL_perfc_op 5
+#define XEN_SYSCTL_getdomaininfolist 6
+#define XEN_SYSCTL_debug_keys 7
+#define XEN_SYSCTL_getcpuinfo 8
+#define XEN_SYSCTL_availheap 9
+#define XEN_SYSCTL_get_pmstat 10
+#define XEN_SYSCTL_cpu_hotplug 11
+#define XEN_SYSCTL_pm_op 12
+#define XEN_SYSCTL_page_offline_op 14
+#define XEN_SYSCTL_lockprof_op 15
+#define XEN_SYSCTL_topologyinfo 16
+#define XEN_SYSCTL_numainfo 17
+#define XEN_SYSCTL_cpupool_op 18
+#define XEN_SYSCTL_scheduler_op 19
+#define XEN_SYSCTL_coverage_op 20
+ uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
+ union {
+ struct xen_sysctl_readconsole readconsole;
+ struct xen_sysctl_tbuf_op tbuf_op;
+ struct xen_sysctl_physinfo physinfo;
+ struct xen_sysctl_topologyinfo topologyinfo;
+ struct xen_sysctl_numainfo numainfo;
+ struct xen_sysctl_sched_id sched_id;
+ struct xen_sysctl_perfc_op perfc_op;
+ struct xen_sysctl_getdomaininfolist getdomaininfolist;
+ struct xen_sysctl_debug_keys debug_keys;
+ struct xen_sysctl_getcpuinfo getcpuinfo;
+ struct xen_sysctl_availheap availheap;
+ struct xen_sysctl_get_pmstat get_pmstat;
+ struct xen_sysctl_cpu_hotplug cpu_hotplug;
+ struct xen_sysctl_pm_op pm_op;
+ struct xen_sysctl_page_offline_op page_offline;
+ struct xen_sysctl_lockprof_op lockprof_op;
+ struct xen_sysctl_cpupool_op cpupool_op;
+ struct xen_sysctl_scheduler_op scheduler_op;
+ struct xen_sysctl_coverage_op coverage_op;
+ uint8_t pad[128];
+ } u;
+};
+typedef struct xen_sysctl xen_sysctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);
+
+#endif /* __XEN_PUBLIC_SYSCTL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/tmem.h
@@ -1,0 +1,148 @@
+/******************************************************************************
+ * tmem.h
+ *
+ * Guest OS interface to Xen Transcendent Memory.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_TMEM_H__
+#define __XEN_PUBLIC_TMEM_H__
+
+#include "xen.h"
+
+/* version of ABI */
+#define TMEM_SPEC_VERSION 1
+
+/* Commands to HYPERVISOR_tmem_op() */
+#define TMEM_CONTROL 0
+#define TMEM_NEW_POOL 1
+#define TMEM_DESTROY_POOL 2
+#define TMEM_NEW_PAGE 3
+#define TMEM_PUT_PAGE 4
+#define TMEM_GET_PAGE 5
+#define TMEM_FLUSH_PAGE 6
+#define TMEM_FLUSH_OBJECT 7
+#define TMEM_READ 8
+#define TMEM_WRITE 9
+#define TMEM_XCHG 10
+
+/* Privileged commands to HYPERVISOR_tmem_op() */
+#define TMEM_AUTH 101
+#define TMEM_RESTORE_NEW 102
+
+/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */
+#define TMEMC_THAW 0
+#define TMEMC_FREEZE 1
+#define TMEMC_FLUSH 2
+#define TMEMC_DESTROY 3
+#define TMEMC_LIST 4
+#define TMEMC_SET_WEIGHT 5
+#define TMEMC_SET_CAP 6
+#define TMEMC_SET_COMPRESS 7
+#define TMEMC_QUERY_FREEABLE_MB 8
+#define TMEMC_SAVE_BEGIN 10
+#define TMEMC_SAVE_GET_VERSION 11
+#define TMEMC_SAVE_GET_MAXPOOLS 12
+#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13
+#define TMEMC_SAVE_GET_CLIENT_CAP 14
+#define TMEMC_SAVE_GET_CLIENT_FLAGS 15
+#define TMEMC_SAVE_GET_POOL_FLAGS 16
+#define TMEMC_SAVE_GET_POOL_NPAGES 17
+#define TMEMC_SAVE_GET_POOL_UUID 18
+#define TMEMC_SAVE_GET_NEXT_PAGE 19
+#define TMEMC_SAVE_GET_NEXT_INV 20
+#define TMEMC_SAVE_END 21
+#define TMEMC_RESTORE_BEGIN 30
+#define TMEMC_RESTORE_PUT_PAGE 32
+#define TMEMC_RESTORE_FLUSH_PAGE 33
+
+/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
+#define TMEM_POOL_PERSIST 1
+#define TMEM_POOL_SHARED 2
+#define TMEM_POOL_PRECOMPRESSED 4
+#define TMEM_POOL_PAGESIZE_SHIFT 4
+#define TMEM_POOL_PAGESIZE_MASK 0xf
+#define TMEM_POOL_VERSION_SHIFT 24
+#define TMEM_POOL_VERSION_MASK 0xff
+#define TMEM_POOL_RESERVED_BITS 0x00ffff00
+
+/* Bits for client flags (save/restore) */
+#define TMEM_CLIENT_COMPRESS 1
+#define TMEM_CLIENT_FROZEN 2
+
+/* Special errno values */
+#define EFROZEN 1000
+#define EEMPTY 1001
+
+
+#ifndef __ASSEMBLY__
+typedef xen_pfn_t tmem_cli_mfn_t;
+typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t;
+struct tmem_op {
+ uint32_t cmd;
+ int32_t pool_id;
+ union {
+ struct {
+ uint64_t uuid[2];
+ uint32_t flags;
+ uint32_t arg1;
+ } creat; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */
+ struct {
+ uint32_t subop;
+ uint32_t cli_id;
+ uint32_t arg1;
+ uint32_t arg2;
+ uint64_t oid[3];
+ tmem_cli_va_t buf;
+ } ctrl; /* for cmd == TMEM_CONTROL */
+ struct {
+
+ uint64_t oid[3];
+ uint32_t index;
+ uint32_t tmem_offset;
+ uint32_t pfn_offset;
+ uint32_t len;
+ tmem_cli_mfn_t cmfn; /* client machine page frame */
+ } gen; /* for all other cmd ("generic") */
+ } u;
+};
+typedef struct tmem_op tmem_op_t;
+DEFINE_XEN_GUEST_HANDLE(tmem_op_t);
+
+struct tmem_handle {
+ uint32_t pool_id;
+ uint32_t index;
+ uint64_t oid[3];
+};
+#endif
+
+#endif /* __XEN_PUBLIC_TMEM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/trace.h
@@ -1,0 +1,310 @@
+/******************************************************************************
+ * include/public/trace.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Mark Williamson, (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2005 Bin Ren
+ */
+
+#ifndef __XEN_PUBLIC_TRACE_H__
+#define __XEN_PUBLIC_TRACE_H__
+
+#define TRACE_EXTRA_MAX 7
+#define TRACE_EXTRA_SHIFT 28
+
+/* Trace classes */
+#define TRC_CLS_SHIFT 16
+#define TRC_GEN 0x0001f000 /* General trace */
+#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */
+#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */
+#define TRC_HVM 0x0008f000 /* Xen HVM trace */
+#define TRC_MEM 0x0010f000 /* Xen memory trace */
+#define TRC_PV 0x0020f000 /* Xen PV traces */
+#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */
+#define TRC_HW 0x0080f000 /* Xen hardware-related traces */
+#define TRC_GUEST 0x0800f000 /* Guest-generated traces */
+#define TRC_ALL 0x0ffff000
+#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
+#define TRC_HD_CYCLE_FLAG (1UL<<31)
+#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )
+#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)
+
+/* Trace subclasses */
+#define TRC_SUBCLS_SHIFT 12
+
+/* trace subclasses for SVM */
+#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */
+#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */
+
+#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */
+#define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */
+#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */
+
+/*
+ * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are
+ * reserved for encoding what scheduler produced the information. The
+ * actual event is encoded in the last 9 bits.
+ *
+ * This means we have 8 scheduling IDs available (which means at most 8
+ * schedulers generating events) and, in each scheduler, up to 512
+ * different events.
+ */
+#define TRC_SCHED_ID_BITS 3
+#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS)
+#define TRC_SCHED_ID_MASK (((1UL<<TRC_SCHED_ID_BITS) - 1) << TRC_SCHED_ID_SHIFT)
+#define TRC_SCHED_EVT_MASK (~(TRC_SCHED_ID_MASK))
+
+/* Per-scheduler IDs, to identify scheduler specific events */
+#define TRC_SCHED_CSCHED 0
+#define TRC_SCHED_CSCHED2 1
+#define TRC_SCHED_SEDF 2
+#define TRC_SCHED_ARINC653 3
+
+/* Per-scheduler tracing */
+#define TRC_SCHED_CLASS_EVT(_c, _e) \
+ ( ( TRC_SCHED_CLASS | \
+ ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \
+ (_e & TRC_SCHED_EVT_MASK) )
+
+/* Trace classes for Hardware */
+#define TRC_HW_PM 0x00801000 /* Power management traces */
+#define TRC_HW_IRQ 0x00802000 /* Traces relating to the handling of IRQs */
+
+/* Trace events per class */
+#define TRC_LOST_RECORDS (TRC_GEN + 1)
+#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2)
+#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3)
+
+#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2)
+#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1)
+#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2)
+#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3)
+#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4)
+#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5)
+#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6)
+#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7)
+#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8)
+#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9)
+#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10)
+#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11)
+#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12)
+#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13)
+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
+#define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16)
+
+#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1)
+#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2)
+#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)
+#define TRC_MEM_SET_P2M_ENTRY (TRC_MEM + 4)
+#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5)
+#define TRC_MEM_POD_POPULATE (TRC_MEM + 16)
+#define TRC_MEM_POD_ZERO_RECLAIM (TRC_MEM + 17)
+#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18)
+
+#define TRC_PV_ENTRY 0x00201000 /* Hypervisor entry points for PV guests. */
+#define TRC_PV_SUBCALL 0x00202000 /* Sub-call in a multicall hypercall */
+
+#define TRC_PV_HYPERCALL (TRC_PV_ENTRY + 1)
+#define TRC_PV_TRAP (TRC_PV_ENTRY + 3)
+#define TRC_PV_PAGE_FAULT (TRC_PV_ENTRY + 4)
+#define TRC_PV_FORCED_INVALID_OP (TRC_PV_ENTRY + 5)
+#define TRC_PV_EMULATE_PRIVOP (TRC_PV_ENTRY + 6)
+#define TRC_PV_EMULATE_4GB (TRC_PV_ENTRY + 7)
+#define TRC_PV_MATH_STATE_RESTORE (TRC_PV_ENTRY + 8)
+#define TRC_PV_PAGING_FIXUP (TRC_PV_ENTRY + 9)
+#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV_ENTRY + 10)
+#define TRC_PV_PTWR_EMULATION (TRC_PV_ENTRY + 11)
+#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV_ENTRY + 12)
+#define TRC_PV_HYPERCALL_V2 (TRC_PV_ENTRY + 13)
+#define TRC_PV_HYPERCALL_SUBCALL (TRC_PV_SUBCALL + 14)
+
+/*
+ * TRC_PV_HYPERCALL_V2 format
+ *
+ * Only some of the hypercall argument are recorded. Bit fields A0 to
+ * A5 in the first extra word are set if the argument is present and
+ * the arguments themselves are packed sequentially in the following
+ * words.
+ *
+ * The TRC_64_FLAG bit is not set for these events (even if there are
+ * 64-bit arguments in the record).
+ *
+ * Word
+ * 0 bit 31 30|29 28|27 26|25 24|23 22|21 20|19 ... 0
+ * A5 |A4 |A3 |A2 |A1 |A0 |Hypercall op
+ * 1 First 32 bit (or low word of first 64 bit) arg in record
+ * 2 Second 32 bit (or high word of first 64 bit) arg in record
+ * ...
+ *
+ * A0-A5 bitfield values:
+ *
+ * 00b Argument not present
+ * 01b 32-bit argument present
+ * 10b 64-bit argument present
+ * 11b Reserved
+ */
+#define TRC_PV_HYPERCALL_V2_ARG_32(i) (0x1 << (20 + 2*(i)))
+#define TRC_PV_HYPERCALL_V2_ARG_64(i) (0x2 << (20 + 2*(i)))
+#define TRC_PV_HYPERCALL_V2_ARG_MASK (0xfff00000)
+
+#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1)
+#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2)
+#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3)
+#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4)
+#define TRC_SHADOW_MMIO (TRC_SHADOW + 5)
+#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6)
+#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7)
+#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8)
+#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9)
+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10)
+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
+#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12)
+#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13)
+#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14)
+#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15)
+
+/* trace events per subclass */
+#define TRC_HVM_NESTEDFLAG (0x400)
+#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01)
+#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02)
+#define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02)
+#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01)
+#define TRC_HVM_PF_XEN64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01)
+#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02)
+#define TRC_HVM_PF_INJECT64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02)
+#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03)
+#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04)
+#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05)
+#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06)
+#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07)
+#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08)
+#define TRC_HVM_CR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08)
+#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09)
+#define TRC_HVM_CR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09)
+#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A)
+#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B)
+#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C)
+#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D)
+#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E)
+#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F)
+#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10)
+#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11)
+#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12)
+#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13)
+#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)
+#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15)
+#define TRC_HVM_IOPORT_READ (TRC_HVM_HANDLER + 0x16)
+#define TRC_HVM_IOMEM_READ (TRC_HVM_HANDLER + 0x17)
+#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18)
+#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19)
+#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
+#define TRC_HVM_RDTSC (TRC_HVM_HANDLER + 0x1a)
+#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0x20)
+#define TRC_HVM_NPF (TRC_HVM_HANDLER + 0x21)
+#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22)
+#define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23)
+#define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24)
+#define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25)
+
+#define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216)
+#define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217)
+
+/* trace events for per class */
+#define TRC_PM_FREQ_CHANGE (TRC_HW_PM + 0x01)
+#define TRC_PM_IDLE_ENTRY (TRC_HW_PM + 0x02)
+#define TRC_PM_IDLE_EXIT (TRC_HW_PM + 0x03)
+
+/* Trace events for IRQs */
+#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1)
+#define TRC_HW_IRQ_MOVE_CLEANUP (TRC_HW_IRQ + 0x2)
+#define TRC_HW_IRQ_BIND_VECTOR (TRC_HW_IRQ + 0x3)
+#define TRC_HW_IRQ_CLEAR_VECTOR (TRC_HW_IRQ + 0x4)
+#define TRC_HW_IRQ_MOVE_FINISH (TRC_HW_IRQ + 0x5)
+#define TRC_HW_IRQ_ASSIGN_VECTOR (TRC_HW_IRQ + 0x6)
+#define TRC_HW_IRQ_UNMAPPED_VECTOR (TRC_HW_IRQ + 0x7)
+#define TRC_HW_IRQ_HANDLED (TRC_HW_IRQ + 0x8)
+
+/*
+ * Event Flags
+ *
+ * Some events (e.g, TRC_PV_TRAP and TRC_HVM_IOMEM_READ) have multiple
+ * record formats. These event flags distinguish between the
+ * different formats.
+ */
+#define TRC_64_FLAG 0x100 /* Addresses are 64 bits (instead of 32 bits) */
+
+/* This structure represents a single trace buffer record. */
+struct t_rec {
+ uint32_t event:28;
+ uint32_t extra_u32:3; /* # entries in trailing extra_u32[] array */
+ uint32_t cycles_included:1; /* u.cycles or u.no_cycles? */
+ union {
+ struct {
+ uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */
+ uint32_t extra_u32[7]; /* event data items */
+ } cycles;
+ struct {
+ uint32_t extra_u32[7]; /* event data items */
+ } nocycles;
+ } u;
+};
+
+/*
+ * This structure contains the metadata for a single trace buffer. The head
+ * field, indexes into an array of struct t_rec's.
+ */
+struct t_buf {
+ /* Assume the data buffer size is X. X is generally not a power of 2.
+ * CONS and PROD are incremented modulo (2*X):
+ * 0 <= cons < 2*X
+ * 0 <= prod < 2*X
+ * This is done because addition modulo X breaks at 2^32 when X is not a
+ * power of 2:
+ * (((2^32 - 1) % X) + 1) % X != (2^32) % X
+ */
+ uint32_t cons; /* Offset of next item to be consumed by control tools. */
+ uint32_t prod; /* Offset of next item to be produced by Xen. */
+ /* Records follow immediately after the meta-data header. */
+};
+
+/* Structure used to pass MFNs to the trace buffers back to trace consumers.
+ * Offset is an offset into the mapped structure where the mfn list will be held.
+ * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]).
+ */
+struct t_info {
+ uint16_t tbuf_size; /* Size in pages of each trace buffer */
+ uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */
+ /* MFN lists immediately after the header */
+};
+
+#endif /* __XEN_PUBLIC_TRACE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/vcpu.h
@@ -1,0 +1,240 @@
+/******************************************************************************
+ * vcpu.h
+ *
+ * VCPU initialisation, query, and hotplug.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_VCPU_H__
+#define __XEN_PUBLIC_VCPU_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ * int vcpu_op(int cmd, int vcpuid, void *extra_args)
+ * @cmd == VCPUOP_??? (VCPU operation).
+ * @vcpuid == VCPU to operate on.
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Initialise a VCPU. Each VCPU can be initialised only once. A
+ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
+ *
+ * @extra_arg == pointer to vcpu_guest_context structure containing initial
+ * state for the VCPU.
+ */
+#define VCPUOP_initialise 0
+
+/*
+ * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
+ * if the VCPU has not been initialised (VCPUOP_initialise).
+ */
+#define VCPUOP_up 1
+
+/*
+ * Bring down a VCPU (i.e., make it non-runnable).
+ * There are a few caveats that callers should observe:
+ * 1. This operation may return, and VCPU_is_up may return false, before the
+ * VCPU stops running (i.e., the command is asynchronous). It is a good
+ * idea to ensure that the VCPU has entered a non-critical loop before
+ * bringing it down. Alternatively, this operation is guaranteed
+ * synchronous if invoked by the VCPU itself.
+ * 2. After a VCPU is initialised, there is currently no way to drop all its
+ * references to domain memory. Even a VCPU that is down still holds
+ * memory references via its pagetable base pointer and GDT. It is good
+ * practise to move a VCPU onto an 'idle' or default page table, LDT and
+ * GDT before bringing it down.
+ */
+#define VCPUOP_down 2
+
+/* Returns 1 if the given VCPU is up. */
+#define VCPUOP_is_up 3
+
+/*
+ * Return information about the state and running time of a VCPU.
+ * @extra_arg == pointer to vcpu_runstate_info structure.
+ */
+#define VCPUOP_get_runstate_info 4
+struct vcpu_runstate_info {
+ /* VCPU's current state (RUNSTATE_*). */
+ int state;
+ /* When was current state entered (system time, ns)? */
+ uint64_t state_entry_time;
+ /*
+ * Time spent in each RUNSTATE_* (ns). The sum of these times is
+ * guaranteed not to drift from system time.
+ */
+ uint64_t time[4];
+};
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
+
+/* VCPU is currently running on a physical CPU. */
+#define RUNSTATE_running 0
+
+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
+#define RUNSTATE_runnable 1
+
+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
+#define RUNSTATE_blocked 2
+
+/*
+ * VCPU is not runnable, but it is not blocked.
+ * This is a 'catch all' state for things like hotplug and pauses by the
+ * system administrator (or for critical sections in the hypervisor).
+ * RUNSTATE_blocked dominates this state (it is the preferred state).
+ */
+#define RUNSTATE_offline 3
+
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ * 1. The registered address may be virtual or physical or guest handle,
+ * depending on the platform. Virtual address or guest handle should be
+ * registered on x86 systems.
+ * 2. Only one shared area may be registered per VCPU. The shared area is
+ * updated by the hypervisor each time the VCPU is scheduled. Thus
+ * runstate.state will always be RUNSTATE_running and
+ * runstate.state_entry_time will indicate the system time at which the
+ * VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_memory_area 5
+struct vcpu_register_runstate_memory_area {
+ union {
+ XEN_GUEST_HANDLE(vcpu_runstate_info_t) h;
+ struct vcpu_runstate_info *v;
+ uint64_t p;
+ } addr;
+};
+typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);
+
+/*
+ * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
+ * which can be set via these commands. Periods smaller than one millisecond
+ * may not be supported.
+ */
+#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */
+#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */
+struct vcpu_set_periodic_timer {
+ uint64_t period_ns;
+};
+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
+
+/*
+ * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
+ * timer which can be set via these commands.
+ */
+#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */
+#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
+struct vcpu_set_singleshot_timer {
+ uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */
+ uint32_t flags; /* VCPU_SSHOTTMR_??? */
+};
+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
+
+/* Flags to VCPUOP_set_singleshot_timer. */
+ /* Require the timeout to be in the future (return -ETIME if it's passed). */
+#define _VCPU_SSHOTTMR_future (0)
+#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future)
+
+/*
+ * Register a memory location in the guest address space for the
+ * vcpu_info structure. This allows the guest to place the vcpu_info
+ * structure in a convenient place, such as in a per-cpu data area.
+ * The pointer need not be page aligned, but the structure must not
+ * cross a page boundary.
+ *
+ * This may be called only once per vcpu.
+ */
+#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */
+struct vcpu_register_vcpu_info {
+ uint64_t mfn; /* mfn of page to place vcpu_info */
+ uint32_t offset; /* offset within page */
+ uint32_t rsvd; /* unused */
+};
+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
+
+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
+#define VCPUOP_send_nmi 11
+
+/*
+ * Get the physical ID information for a pinned vcpu's underlying physical
+ * processor. The physical ID informmation is architecture-specific.
+ * On x86: id[31:0]=apic_id, id[63:32]=acpi_id.
+ * This command returns -EINVAL if it is not a valid operation for this VCPU.
+ */
+#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */
+struct vcpu_get_physid {
+ uint64_t phys_id;
+};
+typedef struct vcpu_get_physid vcpu_get_physid_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t);
+#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid))
+#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32))
+
+/*
+ * Register a memory location to get a secondary copy of the vcpu time
+ * parameters. The master copy still exists as part of the vcpu shared
+ * memory area, and this secondary copy is updated whenever the master copy
+ * is updated (and using the same versioning scheme for synchronisation).
+ *
+ * The intent is that this copy may be mapped (RO) into userspace so
+ * that usermode can compute system time using the time info and the
+ * tsc. Usermode will see an array of vcpu_time_info structures, one
+ * for each vcpu, and choose the right one by an existing mechanism
+ * which allows it to get the current vcpu number (such as via a
+ * segment limit). It can then apply the normal algorithm to compute
+ * system time from the tsc.
+ *
+ * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
+ */
+#define VCPUOP_register_vcpu_time_memory_area 13
+DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t);
+struct vcpu_register_time_memory_area {
+ union {
+ XEN_GUEST_HANDLE(vcpu_time_info_t) h;
+ struct vcpu_time_info *v;
+ uint64_t p;
+ } addr;
+};
+typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
+
+#endif /* __XEN_PUBLIC_VCPU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/version.h
@@ -1,0 +1,96 @@
+/******************************************************************************
+ * version.h
+ *
+ * Xen version, type, and compile information.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Nguyen Anh Quynh <[email protected]>
+ * Copyright (c) 2005, Keir Fraser <[email protected]>
+ */
+
+#ifndef __XEN_PUBLIC_VERSION_H__
+#define __XEN_PUBLIC_VERSION_H__
+
+#include "xen.h"
+
+/* NB. All ops return zero on success, except XENVER_{version,pagesize} */
+
+/* arg == NULL; returns major:minor (16:16). */
+#define XENVER_version 0
+
+/* arg == xen_extraversion_t. */
+#define XENVER_extraversion 1
+typedef char xen_extraversion_t[16];
+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
+
+/* arg == xen_compile_info_t. */
+#define XENVER_compile_info 2
+struct xen_compile_info {
+ char compiler[64];
+ char compile_by[16];
+ char compile_domain[32];
+ char compile_date[32];
+};
+typedef struct xen_compile_info xen_compile_info_t;
+
+#define XENVER_capabilities 3
+typedef char xen_capabilities_info_t[1024];
+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
+
+#define XENVER_changeset 4
+typedef char xen_changeset_info_t[64];
+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
+
+#define XENVER_platform_parameters 5
+struct xen_platform_parameters {
+ xen_ulong_t virt_start;
+};
+typedef struct xen_platform_parameters xen_platform_parameters_t;
+
+#define XENVER_get_features 6
+struct xen_feature_info {
+ unsigned int submap_idx; /* IN: which 32-bit submap to return */
+ uint32_t submap; /* OUT: 32-bit submap */
+};
+typedef struct xen_feature_info xen_feature_info_t;
+
+/* Declares the features reported by XENVER_get_features. */
+#include "features.h"
+
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
+/* arg == xen_domain_handle_t. */
+#define XENVER_guest_handle 8
+
+#define XENVER_commandline 9
+typedef char xen_commandline_t[1024];
+
+#endif /* __XEN_PUBLIC_VERSION_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/xen-compat.h
@@ -1,0 +1,44 @@
+/******************************************************************************
+ * xen-compat.h
+ *
+ * Guest OS interface to Xen. Compatibility layer.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Christian Limpach
+ */
+
+#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
+#define __XEN_PUBLIC_XEN_COMPAT_H__
+
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040300
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+/* Xen is built with matching headers and implements the latest interface. */
+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__
+#elif !defined(__XEN_INTERFACE_VERSION__)
+/* Guests which do not specify a version get the legacy interface. */
+#define __XEN_INTERFACE_VERSION__ 0x00000000
+#endif
+
+#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__
+#error "These header files do not support the requested interface version."
+#endif
+
+#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/xen.h
@@ -1,0 +1,890 @@
+/******************************************************************************
+ * xen.h
+ *
+ * Guest OS interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_XEN_H__
+#define __XEN_PUBLIC_XEN_H__
+
+#include "xen-compat.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "arch-x86/xen.h"
+#elif defined(__arm__) || defined (__aarch64__)
+#include "arch-arm.h"
+#else
+#error "Unsupported architecture"
+#endif
+
+#ifndef __ASSEMBLY__
+/* Guest handles for primitive C types. */
+DEFINE_XEN_GUEST_HANDLE(char);
+__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
+DEFINE_XEN_GUEST_HANDLE(int);
+__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int);
+#if __XEN_INTERFACE_VERSION__ < 0x00040300
+DEFINE_XEN_GUEST_HANDLE(long);
+__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+#endif
+DEFINE_XEN_GUEST_HANDLE(void);
+
+DEFINE_XEN_GUEST_HANDLE(uint64_t);
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
+DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
+#endif
+
+/*
+ * HYPERCALLS
+ */
+
+/* `incontents 100 hcalls List of hypercalls
+ * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*()
+ */
+
+#define __HYPERVISOR_set_trap_table 0
+#define __HYPERVISOR_mmu_update 1
+#define __HYPERVISOR_set_gdt 2
+#define __HYPERVISOR_stack_switch 3
+#define __HYPERVISOR_set_callbacks 4
+#define __HYPERVISOR_fpu_taskswitch 5
+#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */
+#define __HYPERVISOR_platform_op 7
+#define __HYPERVISOR_set_debugreg 8
+#define __HYPERVISOR_get_debugreg 9
+#define __HYPERVISOR_update_descriptor 10
+#define __HYPERVISOR_memory_op 12
+#define __HYPERVISOR_multicall 13
+#define __HYPERVISOR_update_va_mapping 14
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */
+#define __HYPERVISOR_xen_version 17
+#define __HYPERVISOR_console_io 18
+#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */
+#define __HYPERVISOR_grant_table_op 20
+#define __HYPERVISOR_vm_assist 21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op 24
+#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op 26
+#define __HYPERVISOR_xsm_op 27
+#define __HYPERVISOR_nmi_op 28
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_callback_op 30
+#define __HYPERVISOR_xenoprof_op 31
+#define __HYPERVISOR_event_channel_op 32
+#define __HYPERVISOR_physdev_op 33
+#define __HYPERVISOR_hvm_op 34
+#define __HYPERVISOR_sysctl 35
+#define __HYPERVISOR_domctl 36
+#define __HYPERVISOR_kexec_op 37
+#define __HYPERVISOR_tmem_op 38
+#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0 48
+#define __HYPERVISOR_arch_1 49
+#define __HYPERVISOR_arch_2 50
+#define __HYPERVISOR_arch_3 51
+#define __HYPERVISOR_arch_4 52
+#define __HYPERVISOR_arch_5 53
+#define __HYPERVISOR_arch_6 54
+#define __HYPERVISOR_arch_7 55
+
+/* ` } */
+
+/*
+ * HYPERCALL COMPATIBILITY.
+ */
+
+/* New sched_op hypercall introduced in 0x00030101. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030101
+#undef __HYPERVISOR_sched_op
+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
+#endif
+
+/* New event-channel and physdev hypercalls introduced in 0x00030202. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030202
+#undef __HYPERVISOR_event_channel_op
+#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat
+#undef __HYPERVISOR_physdev_op
+#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat
+#endif
+
+/* New platform_op hypercall introduced in 0x00030204. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030204
+#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
+#endif
+
+/*
+ * VIRTUAL INTERRUPTS
+ *
+ * Virtual interrupts that a guest OS may receive from Xen.
+ *
+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
+ * The latter can be allocated only once per guest: they must initially be
+ * allocated to VCPU0 but can subsequently be re-bound.
+ */
+/* ` enum virq { */
+#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */
+#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */
+#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */
+#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */
+#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
+#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
+#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
+#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */
+#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
+#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
+#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
+
+/* Architecture-specific VIRQ definitions. */
+#define VIRQ_ARCH_0 16
+#define VIRQ_ARCH_1 17
+#define VIRQ_ARCH_2 18
+#define VIRQ_ARCH_3 19
+#define VIRQ_ARCH_4 20
+#define VIRQ_ARCH_5 21
+#define VIRQ_ARCH_6 22
+#define VIRQ_ARCH_7 23
+/* ` } */
+
+#define NR_VIRQS 24
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[],
+ * ` unsigned count, unsigned *done_out,
+ * ` unsigned foreigndom)
+ * `
+ * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
+ * @count is the length of the above array.
+ * @pdone is an output parameter indicating number of completed operations
+ * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
+ * hypercall invocation. Can be DOMID_SELF.
+ * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
+ * in this hypercall invocation. The value of this field
+ * (x) encodes the PFD as follows:
+ * x == 0 => PFD == DOMID_SELF
+ * x != 0 => PFD == x - 1
+ *
+ * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
+ * -------------
+ * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
+ * Updates an entry in a page table belonging to PFD. If updating an L1 table,
+ * and the new table entry is valid/present, the mapped frame must belong to
+ * FD. If attempting to map an I/O page then the caller assumes the privilege
+ * of the FD.
+ * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
+ * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
+ * ptr[:2] -- Machine address of the page-table entry to modify.
+ * val -- Value to write.
+ *
+ * There also certain implicit requirements when using this hypercall. The
+ * pages that make up a pagetable must be mapped read-only in the guest.
+ * This prevents uncontrolled guest updates to the pagetable. Xen strictly
+ * enforces this, and will disallow any pagetable update which will end up
+ * mapping pagetable page RW, and will disallow using any writable page as a
+ * pagetable. In practice it means that when constructing a page table for a
+ * process, thread, etc, we MUST be very dilligient in following these rules:
+ * 1). Start with top-level page (PGD or in Xen language: L4). Fill out
+ * the entries.
+ * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD
+ * or L2).
+ * 3). Start filling out the PTE table (L1) with the PTE entries. Once
+ * done, make sure to set each of those entries to RO (so writeable bit
+ * is unset). Once that has been completed, set the PMD (L2) for this
+ * PTE table as RO.
+ * 4). When completed with all of the PMD (L2) entries, and all of them have
+ * been set to RO, make sure to set RO the PUD (L3). Do the same
+ * operation on PGD (L4) pagetable entries that have a PUD (L3) entry.
+ * 5). Now before you can use those pages (so setting the cr3), you MUST also
+ * pin them so that the hypervisor can verify the entries. This is done
+ * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame
+ * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(
+ * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be
+ * issued.
+ * For 32-bit guests, the L4 is not used (as there is less pagetables), so
+ * instead use L3.
+ * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE
+ * hypercall. Also if so desired the OS can also try to write to the PTE
+ * and be trapped by the hypervisor (as the PTE entry is RO).
+ *
+ * To deallocate the pages, the operations are the reverse of the steps
+ * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the
+ * pagetable MUST not be in use (meaning that the cr3 is not set to it).
+ *
+ * ptr[1:0] == MMU_MACHPHYS_UPDATE:
+ * Updates an entry in the machine->pseudo-physical mapping table.
+ * ptr[:2] -- Machine address within the frame whose mapping to modify.
+ * The frame must belong to the FD, if one is specified.
+ * val -- Value to write into the mapping entry.
+ *
+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
+ * with those in @val.
+ *
+ * @val is usually the machine frame number along with some attributes.
+ * The attributes by default follow the architecture defined bits. Meaning that
+ * if this is a X86_64 machine and four page table layout is used, the layout
+ * of val is:
+ * - 63 if set means No execute (NX)
+ * - 46-13 the machine frame number
+ * - 12 available for guest
+ * - 11 available for guest
+ * - 10 available for guest
+ * - 9 available for guest
+ * - 8 global
+ * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)
+ * - 6 dirty
+ * - 5 accessed
+ * - 4 page cached disabled
+ * - 3 page write through
+ * - 2 userspace accessible
+ * - 1 writeable
+ * - 0 present
+ *
+ * The one bits that does not fit with the default layout is the PAGE_PSE
+ * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the
+ * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB
+ * (or 2MB) instead of using the PAGE_PSE bit.
+ *
+ * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen
+ * using it as the Page Attribute Table (PAT) bit - for details on it please
+ * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of
+ * pages instead of using MTRRs.
+ *
+ * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):
+ * PAT4 PAT0
+ * +-----+-----+----+----+----+-----+----+----+
+ * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux
+ * +-----+-----+----+----+----+-----+----+----+
+ * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots)
+ * +-----+-----+----+----+----+-----+----+----+
+ * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen
+ * +-----+-----+----+----+----+-----+----+----+
+ *
+ * The lookup of this index table translates to looking up
+ * Bit 7, Bit 4, and Bit 3 of val entry:
+ *
+ * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
+ *
+ * If all bits are off, then we are using PAT0. If bit 3 turned on,
+ * then we are using PAT1, if bit 3 and bit 4, then PAT2..
+ *
+ * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means
+ * that if a guest that follows Linux's PAT setup and would like to set Write
+ * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is
+ * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the
+ * caching as:
+ *
+ * WB = none (so PAT0)
+ * WC = PWT (bit 3 on)
+ * UC = PWT | PCD (bit 3 and 4 are on).
+ *
+ * To make it work with Xen, it needs to translate the WC bit as so:
+ *
+ * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
+ *
+ * And to translate back it would:
+ *
+ * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
+ */
+#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
+#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
+#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */
+
+/*
+ * MMU EXTENDED OPERATIONS
+ *
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[],
+ * ` unsigned int count,
+ * ` unsigned int *pdone,
+ * ` unsigned int foreigndom)
+ */
+/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
+ *
+ * cmd: MMUEXT_(UN)PIN_*_TABLE
+ * mfn: Machine frame number to be (un)pinned as a p.t. page.
+ * The frame must belong to the FD, if one is specified.
+ *
+ * cmd: MMUEXT_NEW_BASEPTR
+ * mfn: Machine frame number of new page-table base to install in MMU.
+ *
+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
+ * mfn: Machine frame number of new page-table base to install in MMU
+ * when in user space.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_LOCAL
+ * No additional arguments. Flushes local TLB.
+ *
+ * cmd: MMUEXT_INVLPG_LOCAL
+ * linear_addr: Linear address to be flushed from the local TLB.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_MULTI
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ *
+ * cmd: MMUEXT_INVLPG_MULTI
+ * linear_addr: Linear address to be flushed.
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_ALL
+ * No additional arguments. Flushes all VCPUs' TLBs.
+ *
+ * cmd: MMUEXT_INVLPG_ALL
+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
+ *
+ * cmd: MMUEXT_FLUSH_CACHE
+ * No additional arguments. Writes back and flushes cache contents.
+ *
+ * cmd: MMUEXT_FLUSH_CACHE_GLOBAL
+ * No additional arguments. Writes back and flushes cache contents
+ * on all CPUs in the system.
+ *
+ * cmd: MMUEXT_SET_LDT
+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).
+ * nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_[UN]MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be [un]marked.
+ */
+/* ` enum mmuext_cmd { */
+#define MMUEXT_PIN_L1_TABLE 0
+#define MMUEXT_PIN_L2_TABLE 1
+#define MMUEXT_PIN_L3_TABLE 2
+#define MMUEXT_PIN_L4_TABLE 3
+#define MMUEXT_UNPIN_TABLE 4
+#define MMUEXT_NEW_BASEPTR 5
+#define MMUEXT_TLB_FLUSH_LOCAL 6
+#define MMUEXT_INVLPG_LOCAL 7
+#define MMUEXT_TLB_FLUSH_MULTI 8
+#define MMUEXT_INVLPG_MULTI 9
+#define MMUEXT_TLB_FLUSH_ALL 10
+#define MMUEXT_INVLPG_ALL 11
+#define MMUEXT_FLUSH_CACHE 12
+#define MMUEXT_SET_LDT 13
+#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE 16
+#define MMUEXT_COPY_PAGE 17
+#define MMUEXT_FLUSH_CACHE_GLOBAL 18
+#define MMUEXT_MARK_SUPER 19
+#define MMUEXT_UNMARK_SUPER 20
+/* ` } */
+
+#ifndef __ASSEMBLY__
+struct mmuext_op {
+ unsigned int cmd; /* => enum mmuext_cmd */
+ union {
+ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+ * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
+ xen_pfn_t mfn;
+ /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
+ unsigned long linear_addr;
+ } arg1;
+ union {
+ /* SET_LDT */
+ unsigned int nr_ents;
+ /* TLB_FLUSH_MULTI, INVLPG_MULTI */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+ XEN_GUEST_HANDLE(const_void) vcpumask;
+#else
+ const void *vcpumask;
+#endif
+ /* COPY_PAGE */
+ xen_pfn_t src_mfn;
+ } arg2;
+};
+typedef struct mmuext_op mmuext_op_t;
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
+#endif
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val,
+ * ` enum uvm_flags flags)
+ * `
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val,
+ * ` enum uvm_flags flags,
+ * ` domid_t domid)
+ * `
+ * ` @va: The virtual address whose mapping we want to change
+ * ` @val: The new page table entry, must contain a machine address
+ * ` @flags: Control TLB flushes
+ */
+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */
+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */
+/* ` enum uvm_flags { */
+#define UVMF_NONE (0UL<<0) /* No flushing at all. */
+#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */
+#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */
+#define UVMF_FLUSHTYPE_MASK (3UL<<0)
+#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */
+#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */
+#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */
+/* ` } */
+
+/*
+ * Commands to HYPERVISOR_console_io().
+ */
+#define CONSOLEIO_write 0
+#define CONSOLEIO_read 1
+
+/*
+ * Commands to HYPERVISOR_vm_assist().
+ */
+#define VMASST_CMD_enable 0
+#define VMASST_CMD_disable 1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
+#define VMASST_TYPE_4gb_segments 0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
+#define VMASST_TYPE_4gb_segments_notify 1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
+#define VMASST_TYPE_writable_pagetables 2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
+#define VMASST_TYPE_pae_extended_cr3 3
+
+#define MAX_VMASST_TYPE 3
+
+#ifndef __ASSEMBLY__
+
+typedef uint16_t domid_t;
+
+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
+#define DOMID_FIRST_RESERVED (0x7FF0U)
+
+/* DOMID_SELF is used in certain contexts to refer to oneself. */
+#define DOMID_SELF (0x7FF0U)
+
+/*
+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
+ * is useful to ensure that no mappings to the OS's own heap are accidentally
+ * installed. (e.g., in Linux this could cause havoc as reference counts
+ * aren't adjusted on the I/O-mapping code path).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
+ * be specified by any calling domain.
+ */
+#define DOMID_IO (0x7FF1U)
+
+/*
+ * DOMID_XEN is used to allow privileged domains to map restricted parts of
+ * Xen's heap space (e.g., the machine_to_phys table).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
+ * the caller is privileged.
+ */
+#define DOMID_XEN (0x7FF2U)
+
+/*
+ * DOMID_COW is used as the owner of sharable pages */
+#define DOMID_COW (0x7FF3U)
+
+/* DOMID_INVALID is used to identify pages with unknown owner. */
+#define DOMID_INVALID (0x7FF4U)
+
+/* Idle domain. */
+#define DOMID_IDLE (0x7FFFU)
+
+/*
+ * Send an array of these to HYPERVISOR_mmu_update().
+ * NB. The fields are natural pointer/address size for this architecture.
+ */
+struct mmu_update {
+ uint64_t ptr; /* Machine address of PTE. */
+ uint64_t val; /* New contents of PTE. */
+};
+typedef struct mmu_update mmu_update_t;
+DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_multicall(multicall_entry_t call_list[],
+ * ` unsigned int nr_calls);
+ *
+ * NB. The fields are natural register size for this architecture.
+ */
+struct multicall_entry {
+ unsigned long op, result;
+ unsigned long args[6];
+};
+typedef struct multicall_entry multicall_entry_t;
+DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
+
+/*
+ * Event channel endpoints per domain:
+ * 1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ */
+#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
+
+struct vcpu_time_info {
+ /*
+ * Updates to the following values are preceded and followed by an
+ * increment of 'version'. The guest can therefore detect updates by
+ * looking for changes to 'version'. If the least-significant bit of
+ * the version number is set then an update is in progress and the guest
+ * must wait to read a consistent set of values.
+ * The correct way to interact with the version number is similar to
+ * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
+ */
+ uint32_t version;
+ uint32_t pad0;
+ uint64_t tsc_timestamp; /* TSC at last update of time vals. */
+ uint64_t system_time; /* Time, in nanosecs, since boot. */
+ /*
+ * Current system time:
+ * system_time +
+ * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32)
+ * CPU frequency (Hz):
+ * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
+ */
+ uint32_t tsc_to_system_mul;
+ int8_t tsc_shift;
+ int8_t pad1[3];
+}; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;
+
+struct vcpu_info {
+ /*
+ * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+ * a pending notification for a particular VCPU. It is then cleared
+ * by the guest OS /before/ checking for pending work, thus avoiding
+ * a set-and-check race. Note that the mask is only accessed by Xen
+ * on the CPU that is currently hosting the VCPU. This means that the
+ * pending and mask flags can be updated by the guest without special
+ * synchronisation (i.e., no need for the x86 LOCK prefix).
+ * This may seem suboptimal because if the pending flag is set by
+ * a different CPU then an IPI may be scheduled even when the mask
+ * is set. However, note:
+ * 1. The task of 'interrupt holdoff' is covered by the per-event-
+ * channel mask bits. A 'noisy' event that is continually being
+ * triggered can be masked at source at this very precise
+ * granularity.
+ * 2. The main purpose of the per-VCPU mask is therefore to restrict
+ * reentrant execution: whether for concurrency control, or to
+ * prevent unbounded stack usage. Whatever the purpose, we expect
+ * that the mask will be asserted only for short periods at a time,
+ * and so the likelihood of a 'spurious' IPI is suitably small.
+ * The mask is read before making an event upcall to the guest: a
+ * non-zero mask therefore guarantees that the VCPU will not receive
+ * an upcall activation. The mask is cleared when the VCPU requests
+ * to block: this avoids wakeup-waiting races.
+ */
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ xen_ulong_t evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct vcpu_time_info time;
+}; /* 64 bytes (x86) */
+#ifndef __XEN__
+typedef struct vcpu_info vcpu_info_t;
+#endif
+
+/*
+ * `incontents 200 startofday_shared Start-of-day shared data structure
+ * Xen/kernel shared data -- pointer provided in start_info.
+ *
+ * This structure is defined to be both smaller than a page, and the
+ * only data on the shared page, but may vary in actual size even within
+ * compatible Xen versions; guests should not rely on the size
+ * of this structure remaining constant.
+ */
+struct shared_info {
+ struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
+
+ /*
+ * A domain can create "event channels" on which it can send and receive
+ * asynchronous event notifications. There are three classes of event that
+ * are delivered by this mechanism:
+ * 1. Bi-directional inter- and intra-domain connections. Domains must
+ * arrange out-of-band to set up a connection (usually by allocating
+ * an unbound 'listener' port and avertising that via a storage service
+ * such as xenstore).
+ * 2. Physical interrupts. A domain with suitable hardware-access
+ * privileges can bind an event-channel port to a physical interrupt
+ * source.
+ * 3. Virtual interrupts ('events'). A domain can bind an event-channel
+ * port to a virtual interrupt source, such as the virtual-timer
+ * device or the emergency console.
+ *
+ * Event channels are addressed by a "port index". Each channel is
+ * associated with two bits of information:
+ * 1. PENDING -- notifies the domain that there is a pending notification
+ * to be processed. This bit is cleared by the guest.
+ * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING
+ * will cause an asynchronous upcall to be scheduled. This bit is only
+ * updated by the guest. It is read-only within Xen. If a channel
+ * becomes pending while the channel is masked then the 'edge' is lost
+ * (i.e., when the channel is unmasked, the guest must manually handle
+ * pending notifications as no upcall will be scheduled by Xen).
+ *
+ * To expedite scanning of pending notifications, any 0->1 pending
+ * transition on an unmasked channel causes a corresponding bit in a
+ * per-vcpu selector word to be set. Each bit in the selector covers a
+ * 'C long' in the PENDING bitfield array.
+ */
+ xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];
+ xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];
+
+ /*
+ * Wallclock time: updated only by control software. Guests should base
+ * their gettimeofday() syscall on this wallclock-base value.
+ */
+ uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
+ uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
+ uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
+
+ struct arch_shared_info arch;
+
+};
+#ifndef __XEN__
+typedef struct shared_info shared_info_t;
+#endif
+
+/*
+ * `incontents 200 startofday Start-of-day memory layout
+ *
+ * 1. The domain is started within contiguous virtual-memory region.
+ * 2. The contiguous region ends on an aligned 4MB boundary.
+ * 3. This the order of bootstrap elements in the initial virtual region:
+ * a. relocated kernel image
+ * b. initial ram disk [mod_start, mod_len]
+ * c. list of allocated page frames [mfn_list, nr_pages]
+ * (unless relocated due to XEN_ELFNOTE_INIT_P2M)
+ * d. start_info_t structure [register ESI (x86)]
+ * e. bootstrap page tables [pt_base and CR3 (x86)]
+ * f. bootstrap stack [register ESP (x86)]
+ * 4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ * 5. The initial ram disk may be omitted.
+ * 6. The list of page frames forms a contiguous 'pseudo-physical' memory
+ * layout for the domain. In particular, the bootstrap virtual-memory
+ * region is a 1:1 mapping to the first section of the pseudo-physical map.
+ * 7. All bootstrap elements are mapped read-writable for the guest OS. The
+ * only exception is the bootstrap page table, which is mapped read-only.
+ * 8. There is guaranteed to be at least 512kB padding after the final
+ * bootstrap element. If necessary, the bootstrap virtual region is
+ * extended by an extra 4MB to ensure this.
+ *
+ * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page
+ * table layout") a bug caused the pt_base (3.e above) and cr3 to not point
+ * to the start of the guest page tables (it was offset by two pages).
+ * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU
+ * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got
+ * allocated in the order: 'first L1','first L2', 'first L3', so the offset
+ * to the page table base is by two pages back. The initial domain if it is
+ * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the
+ * pages preceding pt_base and mark them as reserved/unused.
+ */
+
+#define MAX_GUEST_CMDLINE 1024
+struct start_info {
+ /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
+ char magic[32]; /* "xen-<version>-<platform>". */
+ unsigned long nr_pages; /* Total pages allocated to this domain. */
+ unsigned long shared_info; /* MACHINE address of shared info struct. */
+ uint32_t flags; /* SIF_xxx flags. */
+ xen_pfn_t store_mfn; /* MACHINE page number of shared page. */
+ uint32_t store_evtchn; /* Event channel for store communication. */
+ union {
+ struct {
+ xen_pfn_t mfn; /* MACHINE page number of console page. */
+ uint32_t evtchn; /* Event channel for console page. */
+ } domU;
+ struct {
+ uint32_t info_off; /* Offset of console_info struct. */
+ uint32_t info_size; /* Size of console_info struct from start.*/
+ } dom0;
+ } console;
+ /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
+ unsigned long pt_base; /* VIRTUAL address of page directory. */
+ unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */
+ unsigned long mfn_list; /* VIRTUAL address of page-frame list. */
+ unsigned long mod_start; /* VIRTUAL address of pre-loaded module */
+ /* (PFN of pre-loaded module if */
+ /* SIF_MOD_START_PFN set in flags). */
+ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
+ int8_t cmd_line[MAX_GUEST_CMDLINE];
+ /* The pfn range here covers both page table and p->m table frames. */
+ unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */
+ unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */
+};
+typedef struct start_info start_info_t;
+
+/* New console union for dom0 introduced in 0x00030203. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+#define console_mfn console.domU.mfn
+#define console_evtchn console.domU.evtchn
+#endif
+
+/* These flags are passed in the 'flags' field of start_info_t. */
+#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
+#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
+#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */
+#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */
+#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
+
+/*
+ * A multiboot module is a package containing modules very similar to a
+ * multiboot module array. The only differences are:
+ * - the array of module descriptors is by convention simply at the beginning
+ * of the multiboot module,
+ * - addresses in the module descriptors are based on the beginning of the
+ * multiboot module,
+ * - the number of modules is determined by a termination descriptor that has
+ * mod_start == 0.
+ *
+ * This permits to both build it statically and reference it in a configuration
+ * file, and let the PV guest easily rebase the addresses to virtual addresses
+ * and at the same time count the number of modules.
+ */
+struct xen_multiboot_mod_list
+{
+ /* Address of first byte of the module */
+ uint32_t mod_start;
+ /* Address of last byte of the module (inclusive) */
+ uint32_t mod_end;
+ /* Address of zero-terminated command line */
+ uint32_t cmdline;
+ /* Unused, must be zero */
+ uint32_t pad;
+};
+/*
+ * `incontents 200 startofday_dom0_console Dom0_console
+ *
+ * The console structure in start_info.console.dom0
+ *
+ * This structure includes a variety of information required to
+ * have a working VGA/VESA console.
+ */
+typedef struct dom0_vga_console_info {
+ uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
+#define XEN_VGATYPE_TEXT_MODE_3 0x03
+#define XEN_VGATYPE_VESA_LFB 0x23
+#define XEN_VGATYPE_EFI_LFB 0x70
+
+ union {
+ struct {
+ /* Font height, in pixels. */
+ uint16_t font_height;
+ /* Cursor location (column, row). */
+ uint16_t cursor_x, cursor_y;
+ /* Number of rows and columns (dimensions in characters). */
+ uint16_t rows, columns;
+ } text_mode_3;
+
+ struct {
+ /* Width and height, in pixels. */
+ uint16_t width, height;
+ /* Bytes per scan line. */
+ uint16_t bytes_per_line;
+ /* Bits per pixel. */
+ uint16_t bits_per_pixel;
+ /* LFB physical address, and size (in units of 64kB). */
+ uint32_t lfb_base;
+ uint32_t lfb_size;
+ /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
+ uint8_t red_pos, red_size;
+ uint8_t green_pos, green_size;
+ uint8_t blue_pos, blue_size;
+ uint8_t rsvd_pos, rsvd_size;
+#if __XEN_INTERFACE_VERSION__ >= 0x00030206
+ /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
+ uint32_t gbl_caps;
+ /* Mode attributes (offset 0x0, VESA command 0x4f01). */
+ uint16_t mode_attrs;
+#endif
+ } vesa_lfb;
+ } u;
+} dom0_vga_console_info_t;
+#define xen_vga_console_info dom0_vga_console_info
+#define xen_vga_console_info_t dom0_vga_console_info_t
+
+typedef uint8_t xen_domain_handle_t[16];
+
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+
+__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t);
+__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);
+__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);
+__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
+#endif /* !__ASSEMBLY__ */
+
+/* Default definitions for macros used by domctl/sysctl. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+#ifndef XEN_GUEST_HANDLE_64
+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
+#endif
+
+#ifndef __ASSEMBLY__
+struct xenctl_bitmap {
+ XEN_GUEST_HANDLE_64(uint8) bitmap;
+ uint32_t nr_bits;
+};
+#endif
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#endif /* __XEN_PUBLIC_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/xencomm.h
@@ -1,0 +1,41 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) IBM Corp. 2006
+ */
+
+#ifndef _XEN_XENCOMM_H_
+#define _XEN_XENCOMM_H_
+
+/* A xencomm descriptor is a scatter/gather list containing physical
+ * addresses corresponding to a virtually contiguous memory area. The
+ * hypervisor translates these physical addresses to machine addresses to copy
+ * to and from the virtually contiguous area.
+ */
+
+#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */
+#define XENCOMM_INVALID (~0UL)
+
+struct xencomm_desc {
+ uint32_t magic;
+ uint32_t nr_addrs; /* the number of entries in address[] */
+ uint64_t address[0];
+};
+
+#endif /* _XEN_XENCOMM_H_ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/xenoprof.h
@@ -1,0 +1,152 @@
+/******************************************************************************
+ * xenoprof.h
+ *
+ * Interface for enabling system wide profiling based on hardware performance
+ * counters
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Written by Aravind Menon & Jose Renato Santos
+ */
+
+#ifndef __XEN_PUBLIC_XENOPROF_H__
+#define __XEN_PUBLIC_XENOPROF_H__
+
+#include "xen.h"
+
+/*
+ * Commands to HYPERVISOR_xenoprof_op().
+ */
+#define XENOPROF_init 0
+#define XENOPROF_reset_active_list 1
+#define XENOPROF_reset_passive_list 2
+#define XENOPROF_set_active 3
+#define XENOPROF_set_passive 4
+#define XENOPROF_reserve_counters 5
+#define XENOPROF_counter 6
+#define XENOPROF_setup_events 7
+#define XENOPROF_enable_virq 8
+#define XENOPROF_start 9
+#define XENOPROF_stop 10
+#define XENOPROF_disable_virq 11
+#define XENOPROF_release_counters 12
+#define XENOPROF_shutdown 13
+#define XENOPROF_get_buffer 14
+#define XENOPROF_set_backtrace 15
+
+/* AMD IBS support */
+#define XENOPROF_get_ibs_caps 16
+#define XENOPROF_ibs_counter 17
+#define XENOPROF_last_op 17
+
+#define MAX_OPROF_EVENTS 32
+#define MAX_OPROF_DOMAINS 25
+#define XENOPROF_CPU_TYPE_SIZE 64
+
+/* Xenoprof performance events (not Xen events) */
+struct event_log {
+ uint64_t eip;
+ uint8_t mode;
+ uint8_t event;
+};
+
+/* PC value that indicates a special code */
+#define XENOPROF_ESCAPE_CODE (~0ULL)
+/* Transient events for the xenoprof->oprofile cpu buf */
+#define XENOPROF_TRACE_BEGIN 1
+
+/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
+struct xenoprof_buf {
+ uint32_t event_head;
+ uint32_t event_tail;
+ uint32_t event_size;
+ uint32_t vcpu_id;
+ uint64_t xen_samples;
+ uint64_t kernel_samples;
+ uint64_t user_samples;
+ uint64_t lost_samples;
+ struct event_log event_log[1];
+};
+#ifndef __XEN__
+typedef struct xenoprof_buf xenoprof_buf_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t);
+#endif
+
+struct xenoprof_init {
+ int32_t num_events;
+ int32_t is_primary;
+ char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+};
+typedef struct xenoprof_init xenoprof_init_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t);
+
+struct xenoprof_get_buffer {
+ int32_t max_samples;
+ int32_t nbuf;
+ int32_t bufsize;
+ uint64_t buf_gmaddr;
+};
+typedef struct xenoprof_get_buffer xenoprof_get_buffer_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t);
+
+struct xenoprof_counter {
+ uint32_t ind;
+ uint64_t count;
+ uint32_t enabled;
+ uint32_t event;
+ uint32_t hypervisor;
+ uint32_t kernel;
+ uint32_t user;
+ uint64_t unit_mask;
+};
+typedef struct xenoprof_counter xenoprof_counter_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t);
+
+typedef struct xenoprof_passive {
+ uint16_t domain_id;
+ int32_t max_samples;
+ int32_t nbuf;
+ int32_t bufsize;
+ uint64_t buf_gmaddr;
+} xenoprof_passive_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t);
+
+struct xenoprof_ibs_counter {
+ uint64_t op_enabled;
+ uint64_t fetch_enabled;
+ uint64_t max_cnt_fetch;
+ uint64_t max_cnt_op;
+ uint64_t rand_en;
+ uint64_t dispatched_ops;
+};
+typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t);
+
+#endif /* __XEN_PUBLIC_XENOPROF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+++ b/sys/src/9/xen/xen-public/xsm/flask_op.h
@@ -1,0 +1,201 @@
+/*
+ * This file contains the flask_op hypercall commands and definitions.
+ *
+ * Author: George Coker, <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __FLASK_OP_H__
+#define __FLASK_OP_H__
+
+#define XEN_FLASK_INTERFACE_VERSION 1
+
+struct xen_flask_load {
+ XEN_GUEST_HANDLE(char) buffer;
+ uint32_t size;
+};
+
+struct xen_flask_setenforce {
+ uint32_t enforcing;
+};
+
+struct xen_flask_sid_context {
+ /* IN/OUT: sid to convert to/from string */
+ uint32_t sid;
+ /* IN: size of the context buffer
+ * OUT: actual size of the output context string
+ */
+ uint32_t size;
+ XEN_GUEST_HANDLE(char) context;
+};
+
+struct xen_flask_access {
+ /* IN: access request */
+ uint32_t ssid;
+ uint32_t tsid;
+ uint32_t tclass;
+ uint32_t req;
+ /* OUT: AVC data */
+ uint32_t allowed;
+ uint32_t audit_allow;
+ uint32_t audit_deny;
+ uint32_t seqno;
+};
+
+struct xen_flask_transition {
+ /* IN: transition SIDs and class */
+ uint32_t ssid;
+ uint32_t tsid;
+ uint32_t tclass;
+ /* OUT: new SID */
+ uint32_t newsid;
+};
+
+struct xen_flask_userlist {
+ /* IN: starting SID for list */
+ uint32_t start_sid;
+ /* IN: size of user string and output buffer
+ * OUT: number of SIDs returned */
+ uint32_t size;
+ union {
+ /* IN: user to enumerate SIDs */
+ XEN_GUEST_HANDLE(char) user;
+ /* OUT: SID list */
+ XEN_GUEST_HANDLE(uint32) sids;
+ } u;
+};
+
+struct xen_flask_boolean {
+ /* IN/OUT: numeric identifier for boolean [GET/SET]
+ * If -1, name will be used and bool_id will be filled in. */
+ uint32_t bool_id;
+ /* OUT: current enforcing value of boolean [GET/SET] */
+ uint8_t enforcing;
+ /* OUT: pending value of boolean [GET/SET] */
+ uint8_t pending;
+ /* IN: new value of boolean [SET] */
+ uint8_t new_value;
+ /* IN: commit new value instead of only setting pending [SET] */
+ uint8_t commit;
+ /* IN: size of boolean name buffer [GET/SET]
+ * OUT: actual size of name [GET only] */
+ uint32_t size;
+ /* IN: if bool_id is -1, used to find boolean [GET/SET]
+ * OUT: textual name of boolean [GET only]
+ */
+ XEN_GUEST_HANDLE(char) name;
+};
+
+struct xen_flask_setavc_threshold {
+ /* IN */
+ uint32_t threshold;
+};
+
+struct xen_flask_hash_stats {
+ /* OUT */
+ uint32_t entries;
+ uint32_t buckets_used;
+ uint32_t buckets_total;
+ uint32_t max_chain_len;
+};
+
+struct xen_flask_cache_stats {
+ /* IN */
+ uint32_t cpu;
+ /* OUT */
+ uint32_t lookups;
+ uint32_t hits;
+ uint32_t misses;
+ uint32_t allocations;
+ uint32_t reclaims;
+ uint32_t frees;
+};
+
+struct xen_flask_ocontext {
+ /* IN */
+ uint32_t ocon;
+ uint32_t sid;
+ uint64_t low, high;
+};
+
+struct xen_flask_peersid {
+ /* IN */
+ evtchn_port_t evtchn;
+ /* OUT */
+ uint32_t sid;
+};
+
+struct xen_flask_relabel {
+ /* IN */
+ uint32_t domid;
+ uint32_t sid;
+};
+
+struct xen_flask_op {
+ uint32_t cmd;
+#define FLASK_LOAD 1
+#define FLASK_GETENFORCE 2
+#define FLASK_SETENFORCE 3
+#define FLASK_CONTEXT_TO_SID 4
+#define FLASK_SID_TO_CONTEXT 5
+#define FLASK_ACCESS 6
+#define FLASK_CREATE 7
+#define FLASK_RELABEL 8
+#define FLASK_USER 9
+#define FLASK_POLICYVERS 10
+#define FLASK_GETBOOL 11
+#define FLASK_SETBOOL 12
+#define FLASK_COMMITBOOLS 13
+#define FLASK_MLS 14
+#define FLASK_DISABLE 15
+#define FLASK_GETAVC_THRESHOLD 16
+#define FLASK_SETAVC_THRESHOLD 17
+#define FLASK_AVC_HASHSTATS 18
+#define FLASK_AVC_CACHESTATS 19
+#define FLASK_MEMBER 20
+#define FLASK_ADD_OCONTEXT 21
+#define FLASK_DEL_OCONTEXT 22
+#define FLASK_GET_PEER_SID 23
+#define FLASK_RELABEL_DOMAIN 24
+ uint32_t interface_version; /* XEN_FLASK_INTERFACE_VERSION */
+ union {
+ struct xen_flask_load load;
+ struct xen_flask_setenforce enforce;
+ /* FLASK_CONTEXT_TO_SID and FLASK_SID_TO_CONTEXT */
+ struct xen_flask_sid_context sid_context;
+ struct xen_flask_access access;
+ /* FLASK_CREATE, FLASK_RELABEL, FLASK_MEMBER */
+ struct xen_flask_transition transition;
+ struct xen_flask_userlist userlist;
+ /* FLASK_GETBOOL, FLASK_SETBOOL */
+ struct xen_flask_boolean boolean;
+ struct xen_flask_setavc_threshold setavc_threshold;
+ struct xen_flask_hash_stats hash_stats;
+ struct xen_flask_cache_stats cache_stats;
+ /* FLASK_ADD_OCONTEXT, FLASK_DEL_OCONTEXT */
+ struct xen_flask_ocontext ocontext;
+ struct xen_flask_peersid peersid;
+ struct xen_flask_relabel relabel;
+ } u;
+};
+typedef struct xen_flask_op xen_flask_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_flask_op_t);
+
+#endif
--- /dev/null
+++ b/sys/src/9/xen/xen.s
@@ -1,0 +1,72 @@
+#include "xendefs.h"
+#include "mem.h"
+
+#define ENTRY(X) TEXT X(SB), $0
+
+/*
+ * XXX there's a race in here because we can get an upcall
+ * betwen the spllo() (in xenupcall) and the rti. This will make
+ * handlers stack, which could lead to a blown stack. Probably
+ * not worth fixing (but possibly worth detecting and panicing).
+ *
+ * For fun get some popcorn and turn off the lights and read the
+ * linux solution (search for scrit/ecrit).
+ */
+ENTRY(hypervisor_callback)
+ SUBL $8, SP /* space for ecode and trap type */
+ PUSHL DS /* save DS */
+ PUSHL $(KDSEL)
+ POPL DS /* fix up DS */
+ PUSHL ES /* save ES */
+ PUSHL $(KDSEL)
+ POPL ES /* fix up ES */
+
+ PUSHL FS /* save the rest of the Ureg struct */
+ PUSHL GS
+ PUSHAL
+
+ PUSHL SP /* Ureg* argument to trap */
+ CALL xenupcall+0(SB)
+ POPL AX
+
+ POPAL
+ POPL GS
+ POPL FS
+ POPL ES
+ POPL DS
+ ADDL $8, SP /* pop error code and trap type */
+ IRETL
+
+/* Hypervisor uses this for application faults while it executes.*/
+ENTRY(failsafe_callback)
+ IRETL
+ PUSHL AX
+ CALL install_safe_pf_handler(SB)
+ MOVL 32(SP), BX
+ MOVW BX, DS
+ MOVL 36(SP), BX
+ MOVW BX, ES
+ MOVL 40(SP), BX
+ MOVW BX, FS
+ MOVL 44(SP), BX
+ MOVW BX, GS
+ CALL install_normal_pf_handler(SB)
+ POPL AX
+ ADDL $16, SP
+ IRETL
+
+/* xen traps with varying argument counts */
+TEXT xencall6(SB), $0
+ MOVL VDI+20(FP), DI
+TEXT xencall5(SB), $0
+ MOVL VSI+16(FP), SI
+TEXT xencall4(SB), $0
+ MOVL VDX+12(FP), DX
+TEXT xencall3(SB), $0
+ MOVL VCX+8(FP), CX
+TEXT xencall2(SB), $0
+ MOVL VBX+4(FP), BX
+TEXT xencall1(SB), $0
+ MOVL op+0(FP), AX
+ INT $0x82
+ RET
--- /dev/null
+++ b/sys/src/9/xen/xenbin.c
@@ -1,0 +1,162 @@
+/*
+ * Transform a Plan 9 386 bootable image to make it compatible with
+ * the Xen binary image loader:
+ *
+ * - pad the beginning of the text with zeroes so that the image can be loaded at
+ * guest 'physical' address 0
+ * - insert a Xen header
+ * - pad the end of the text so that data segment is page-aligned in the file
+ * - adjust the linenumber-pc table so Plan 9 debuggers won't be confused
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <mach.h>
+
+#define PAGE 4096
+#define PLAN9HDR 32
+#define XENHDR 32
+#define KZERO 0x80000000
+#define FLAG_VALID (1<<16)
+#define FLAG_PAE (1<<14)
+
+void
+lput(long n)
+{
+ char buf[sizeof(long)];
+ int i;
+
+ for (i = sizeof(long)-1; i >= 0; i--) {
+ buf[i] = n;
+ n >>= 8;
+ }
+ write(1, buf, sizeof(long));
+}
+
+void
+rput(long n)
+{
+ char buf[sizeof(long)];
+ int i;
+
+ for (i = 0; i < sizeof(long); i++) {
+ buf[i] = n;
+ n >>= 8;
+ }
+ write(1, buf, sizeof(long));
+}
+
+void
+copy(long n)
+{
+ char buf[PAGE];
+ int m;
+
+ while (n > 0) {
+ m = sizeof buf;
+ if (m > n)
+ m = n;
+ read(0, buf, m);
+ write(1, buf, m);
+ n -= m;
+ }
+}
+
+void pad(int n)
+{
+ char buf[PAGE];
+ int m;
+
+ memset(buf, 0, sizeof buf);
+ while (n > 0) {
+ m = sizeof buf;
+ if (m > n)
+ m = n;
+ write(1, buf, m);
+ n -= m;
+ }
+}
+
+/*
+ * See /sys/src/cmd/8l/span.c:/^asmlc
+ */
+void adjustlnpc(int v)
+{
+ char buf[PAGE];
+ int n, s;
+
+ n = 0;
+ while (v) {
+ s = 127;
+ if (v < 127)
+ s = v;
+ buf[n++] = s+128;
+ if (n == sizeof buf) {
+ write(1, buf, n);
+ n = 0;
+ }
+ v -= s;
+ }
+ if (n > 0)
+ write(1, buf, n);
+}
+
+void
+main(int argc, char **argv)
+{
+ Fhdr fhdr;
+ long newtxtsz;
+ long newentry;
+ long newlnpcsz;
+ long prepad, postpad;
+ long flags;
+
+ flags = FLAG_VALID;
+ if (argc > 1 && strcmp(argv[1], "-p") == 0)
+ flags |= FLAG_PAE;
+
+ crackhdr(0, &fhdr);
+
+ newtxtsz = ((fhdr.txtsz+PLAN9HDR+PAGE-1)&~(PAGE-1)) - PLAN9HDR;
+ newentry = KZERO+PLAN9HDR;
+ prepad = fhdr.entry - newentry;
+ postpad = newtxtsz - fhdr.txtsz;
+ newtxtsz += prepad;
+ newlnpcsz = fhdr.lnpcsz;
+ if (newlnpcsz)
+ newlnpcsz += (prepad+126)/127;
+
+ /* plan 9 header */
+ lput(4*11*11+7); /* magic */
+ lput(newtxtsz); /* sizes */
+ lput(fhdr.datsz);
+ lput(fhdr.bsssz);
+ lput(fhdr.symsz); /* nsyms */
+ lput(newentry); /* va of entry */
+ lput(fhdr.sppcsz); /* sp offsets */
+ lput(newlnpcsz); /* line offsets */
+
+ /* xen header */
+ rput(0x336EC578); /* magic */
+ rput(flags); /* flags */
+ rput(-(0x336EC578+flags)); /* checksum */
+ rput(newentry); /* header_addr */
+ rput(KZERO); /* load_addr */
+ rput(KZERO+newtxtsz+fhdr.datsz); /* load_end_addr */
+ rput(KZERO+newtxtsz+fhdr.datsz+fhdr.bsssz); /* bss_end_addr */
+ rput(fhdr.entry); /* entry_addr */
+
+ pad(prepad-XENHDR);
+
+ seek(0, fhdr.txtoff, 0);
+ copy(fhdr.txtsz);
+ pad(postpad);
+ copy(fhdr.datsz);
+ copy(fhdr.symsz);
+ if (newlnpcsz) {
+ adjustlnpc(prepad);
+ copy(fhdr.lnpcsz);
+ }
+ exits(0);
+}
--- /dev/null
+++ b/sys/src/9/xen/xenelf.c
@@ -1,0 +1,157 @@
+#include <u.h>
+#include <libc.h>
+#include "/sys/src/libmach/elf.h"
+
+enum {
+ Page = 4096,
+};
+
+#define ROUND(n) ((n+Page-1)&~(Page-1))
+
+Shdr isect, csect;
+
+static ushort
+GETS(void *a)
+{
+ uchar *p = a;
+ return p[0] | p[1]<<8;
+}
+
+static ulong
+GETL(void *a)
+{
+ uchar *p = a;
+ return p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24;
+}
+
+static void
+PUTS(void *a, ushort v)
+{
+ uchar *p = a;
+ p[0] = v;
+ p[1] = v>>8;
+}
+
+static void
+PUTL(void *a, ulong v)
+{
+ uchar *p = a;
+ p[0] = v;
+ p[1] = v>>8;
+ p[2] = v>>16;
+ p[3] = v>>24;
+}
+
+void
+copy(int fin, int fout, ulong src, ulong dst, ulong size)
+{
+ char buf[Page];
+ int n;
+
+ seek(fin, src, 0);
+ seek(fout, dst, 0);
+ n = Page;
+ while (size > 0) {
+ if (n > size)
+ n = size;
+ read(fin, buf, n);
+ write(fout, buf, n);
+ size -= n;
+ }
+}
+
+void
+main(int argc, char **argv)
+{
+ Ehdr e;
+ Shdr s;
+ Phdr p;
+ int efd, ofd, ns, i, n;
+ ulong shoff, off, noff, size, msize;
+ char *sname, *sval;
+
+ if (argc != 5)
+ sysfatal("Usage: xenelf input-elf-file output-elf-file section-name section-contents");
+ efd = open(argv[1], OREAD);
+ if (efd < 0)
+ sysfatal("%s: %r", argv[1]);
+ ofd = create(argv[2], OWRITE, 0666);
+ if (ofd < 0)
+ sysfatal("%s: %r", argv[2]);
+ sname = argv[3];
+ sval = argv[4];
+
+ read(efd, &e, sizeof e);
+ //if (e.shstrndx)
+ // sysfatal("section header string index already present");
+
+ /* page-align loadable segments in file */
+ ns = GETS(&e.phnum);
+ shoff = GETL(&e.phoff);
+ noff = shoff+ns*sizeof(Phdr);
+ noff = ROUND(noff);
+ for (i = 0; i < ns; i++) {
+ seek(efd, shoff+i*sizeof(Phdr), 0);
+ read(efd, &p, sizeof p);
+ off = GETL(&p.offset);
+ PUTL(&p.offset, noff);
+ size = GETL(&p.filesz);
+ copy(efd, ofd, off, noff, size);
+ if (GETL(&p.type) == LOAD) {
+ size = ROUND(size);
+ PUTL(&p.filesz, size);
+ if ((msize = GETL(&p.memsz)) != 0 && size > msize)
+ PUTL(&p.memsz, size);
+ } else {
+ /* memory size for symtab segment is actually line number table size */
+ msize = GETL(&p.memsz);
+ copy(efd, ofd, off+size, noff+size, msize);
+ noff += msize;
+ }
+ noff += size;
+ seek(ofd, shoff+i*sizeof(Phdr), 0);
+ write(ofd, &p, sizeof p);
+ }
+
+ /* append single-entry shstrndx */
+ PUTL(&isect.offset, seek(ofd, noff, 0));
+ n = strlen(sname);
+ PUTL(&isect.size, n+2);
+ write(ofd, sname+n, 1);
+ write(ofd, sname, n+1);
+
+ /* append comment section contents */
+ PUTL(&csect.name, 1);
+ PUTL(&csect.offset, seek(ofd, 0, 2));
+ n = strlen(sval);
+ PUTL(&csect.size, n+1);
+ write(ofd, sval, n+1);
+
+ /* copy existing section headers to end */
+ ns = 0; //GETS(&e.shnum);
+ shoff = GETL(&e.shoff);
+ PUTL(&e.shoff, seek(ofd, 0, 2));
+ for (i = 0; i < ns; i++) {
+ seek(efd, shoff+i*sizeof(Shdr), 0);
+ read(efd, &s, sizeof s);
+ seek(ofd, 0, 2);
+ write(ofd, &s, sizeof s);
+ }
+
+ /* append section header for comment section */
+ write(ofd, &csect, sizeof csect);
+ ++ns;
+
+ /* append section header for shstrndx */
+ PUTS(&e.shstrndx, ns);
+ ++ns;
+ write(ofd, &isect, sizeof isect);
+
+ /* rewrite elf header */
+ PUTS(&e.shentsize, sizeof(Shdr));
+ PUTS(&e.shnum, ns);
+ seek(ofd, 0, 0);
+ write(ofd, &e, sizeof e);
+
+ exits(0);
+}
--- /dev/null
+++ b/sys/src/9/xen/xengrant.c
@@ -1,0 +1,100 @@
+/*
+ * Sharing page frames with other domains
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+enum {
+ Nframes = 1, // XXX don't increase this without setting up extra mappings in xengrant_init()
+};
+
+static struct {
+ Lock;
+ ushort free;
+ ushort *refs;
+} refalloc;
+
+static grant_entry_t *granttab;
+
+void
+xengrantinit(void)
+{
+ gnttab_setup_table_t setup;
+ ulong frames[Nframes];
+ int nrefs, i;
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = Nframes;
+ set_xen_guest_handle(setup.frame_list, frames);
+ if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 || setup.status != 0)
+ panic("xen grant table setup");
+ granttab = (grant_entry_t*)mmumapframe(XENGRANTTAB, frames[0]);
+ nrefs = Nframes * BY2PG / sizeof(grant_entry_t);
+ refalloc.refs = (ushort*)malloc(nrefs*sizeof(ushort));
+ for (i = 0; i < nrefs; i++)
+ refalloc.refs[i] = i-1;
+ refalloc.free = nrefs-1;
+}
+
+static int
+allocref(void)
+{
+ int ref;
+
+ ilock(&refalloc);
+ ref = refalloc.free;
+ if (ref > 0)
+ refalloc.free = refalloc.refs[ref];
+ iunlock(&refalloc);
+ return ref;
+}
+
+static void
+freeref(int ref)
+{
+ ilock(&refalloc);
+ refalloc.refs[ref] = refalloc.free;
+ refalloc.free = ref;
+ iunlock(&refalloc);
+}
+
+int
+xengrant(domid_t domid, ulong frame, int flags)
+{
+ int ref;
+ grant_entry_t *gt;
+
+ if ((ref = allocref()) < 0)
+ panic("out of xengrant refs");
+ gt = &granttab[ref];
+ gt->frame = frame;
+ gt->domid = domid;
+ coherence();
+ gt->flags = flags;
+ return ref;
+}
+
+int
+xengrantend(int ref)
+{
+ grant_entry_t *gt;
+ int frame;
+
+ gt = &granttab[ref];
+ coherence();
+ if (gt->flags>F_accept_transfer) {
+ if ((gt->flags>F_transfer_completed) == 0)
+ panic("xengrantend transfer in progress");
+ } else {
+ if (gt->flags&(GTF_reading|GTF_writing))
+ panic("xengrantend frame in use");
+ }
+ coherence();
+ frame = gt->frame;
+ gt->flags = GTF_invalid;
+ freeref(ref);
+ return frame;
+}
--- /dev/null
+++ b/sys/src/9/xen/xenpcf
@@ -1,0 +1,62 @@
+dev
+ root netif
+ cons
+ uart
+ arch
+ env
+ pipe
+ proc
+ mnt
+ srv
+ shr
+ dup
+ rtc
+ ssl
+ tls
+ cap
+ kprof
+ fs
+
+ xenstore
+ ether netif
+ ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum386 inferno
+
+ sd
+
+link
+ etherxen
+ ethermedium
+ netdevmedium
+ loopbackmedium
+
+misc
+ archxen
+ sdxen
+ uartxen
+
+ip
+ tcp
+ udp
+ rudp
+ ipifc
+ icmp
+ icmp6
+ gre
+ ipmux
+ esp
+ il
+
+port
+ int cpuserver = 0;
+
+boot boot
+ tcp
+ local
+
+bootdir
+ boot$CONF.out boot
+ /$objtype/bin/paqfs
+ /$objtype/bin/auth/factotum
+ bootfs.paq
+ xenstore
+
--- /dev/null
+++ b/sys/src/9/xen/xenstore.c
@@ -1,0 +1,130 @@
+#include <u.h>
+#include <libc.h>
+
+typedef ulong uint32_t;
+
+enum xsd_sockmsg_type
+{
+ XS_DEBUG,
+ XS_DIRECTORY,
+ XS_READ,
+ XS_GET_PERMS,
+ XS_WATCH,
+ XS_UNWATCH,
+ XS_TRANSACTION_START,
+ XS_TRANSACTION_END,
+ XS_INTRODUCE,
+ XS_RELEASE,
+ XS_GET_DOMAIN_PATH,
+ XS_WRITE,
+ XS_MKDIR,
+ XS_RM,
+ XS_SET_PERMS,
+ XS_WATCH_EVENT,
+ XS_ERROR,
+ XS_IS_DOMAIN_INTRODUCED
+};
+
+struct xsd_sockmsg
+{
+ uint32_t type; /* XS_??? */
+ uint32_t req_id;/* Request identifier, echoed in daemon's response. */
+ uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
+ uint32_t len; /* Length of data following this. */
+
+ /* Generally followed by nul-terminated string(s). */
+};
+
+char*
+xscmd(int fd, enum xsd_sockmsg_type cmd, char *s, char *val)
+{
+ static char buf[512];
+ struct xsd_sockmsg *msg;
+ char *arg;
+ static ulong reqid = 1;
+ int n;
+
+ msg = (struct xsd_sockmsg*)buf;
+ arg = buf + sizeof(*msg);
+ if(cmd != XS_WATCH_EVENT){
+ msg->type = cmd;
+ msg->req_id = reqid++;
+ msg->tx_id = 0;
+ msg->len = strlen(s)+1;
+ if (val != 0) {
+ msg->len += strlen(val);
+ if (msg->type == XS_WATCH)
+ msg->len++;
+ }
+ strcpy(arg, s);
+ if (val != 0)
+ strcpy(arg+strlen(s)+1, val);
+ if (write(fd, buf, sizeof(*msg)+msg->len) < 0)
+ sysfatal("write: %r");
+ }
+ if ((n = read(fd, buf, sizeof(*msg))) != sizeof(*msg))
+ sysfatal("read hdr %d: %r", n);
+ fprint(2, "type %lud req_id %lud len %lud\n", msg->type, msg->req_id, msg->len);
+ if ((n = read(fd, arg, msg->len)) != msg->len)
+ sysfatal("read data %d: %r", n);
+ if (cmd == XS_DIRECTORY || cmd == XS_WATCH_EVENT) {
+ for (s = arg; s < arg+msg->len; s++) {
+ if (*s == 0) *s = ',';
+ else if (*s < 32) *s += '0';
+ }
+ }
+ arg[msg->len] = 0;
+ return arg;
+}
+
+void
+usage(void)
+{
+ sysfatal("Usage: xenstore [lrwdme] path [value]\n");
+}
+
+void
+main(int argc, char *argv[])
+{
+ int fd;
+
+ if (argc != 3 && argc != 4)
+ usage();
+ if(access("/dev/xenstore", AEXIST) < 0)
+ bind("#x", "/dev", MAFTER);
+ fd = open("/dev/xenstore", ORDWR);
+ if (fd < 0)
+ sysfatal("/dev/xenstore: %r");
+ switch (argv[1][0]) {
+ default:
+ usage();
+ break;
+ case 'r':
+ print("%s\n", xscmd(fd, XS_READ, argv[2], 0));
+ break;
+ case 'l':
+ print("%s\n", xscmd(fd, XS_DIRECTORY, argv[2], 0));
+ break;
+ case 'm':
+ print("%s\n", xscmd(fd, XS_MKDIR, argv[2], 0));
+ break;
+ case 'd':
+ print("%s\n", xscmd(fd, XS_RM, argv[2], 0));
+ break;
+ case 'w':
+ if (argc != 4)
+ usage();
+ print("%s\n", xscmd(fd, XS_WRITE, argv[2], argv[3]));
+ break;
+ case 'e':
+ if (argc != 4)
+ usage();
+ print("%s\n", xscmd(fd, XS_WATCH, argv[2], argv[3]));
+ close(fd);
+ fd = open("/dev/xenwatch", OREAD);
+ if (fd < 0)
+ sysfatal("/dev/xenwatch: %r");
+ for (;;)
+ print("%s\n", xscmd(fd, XS_WATCH_EVENT, 0, 0));
+ }
+}
--- /dev/null
+++ b/sys/src/9/xen/xensystem.c
@@ -1,0 +1,541 @@
+/*
+ * xensystem.c
+ *
+ * TODO: we could handle mmu updates more efficiently by
+ * using a multicall.
+ * XXX perhaps we should check return values and panic on failure?
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+#define LOG(a)
+
+/*
+ * These functions replace all the inlines that are used on Linux systems
+ */
+
+/* in xen.s */
+int xencall1(int op);
+int xencall2(int op, ulong arg1);
+int xencall3(int op, ulong arg1, ulong arg2);
+int xencall4(int op, ulong arg1, ulong arg2, ulong arg3);
+int xencall5(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4);
+int xencall6(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5);
+
+int
+HYPERVISOR_update_va_mapping(ulong va, uvlong newval, ulong flags)
+{
+ int ret;
+
+ ret = xencall5(__HYPERVISOR_update_va_mapping, va, newval, newval>>32, flags);
+ if(ret < 0)
+ panic("update_va_mapping failed");
+ return ret;
+}
+
+long
+HYPERVISOR_set_timer_op(uvlong timeout)
+{
+ ulong hi, lo;
+
+ hi = timeout>>32;
+ lo = timeout;
+ return xencall3(__HYPERVISOR_set_timer_op, lo, hi);
+}
+
+int
+HYPERVISOR_set_trap_table(trap_info_t *table)
+{
+ return xencall2(__HYPERVISOR_set_trap_table, (ulong)table);
+}
+
+int
+HYPERVISOR_mmu_update(mmu_update_t *req, int count,
+ int *success_count, domid_t domid)
+{
+ return xencall5(__HYPERVISOR_mmu_update, (ulong)req, count, (ulong)success_count, domid);
+}
+
+int
+HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid)
+{
+ return xencall5(__HYPERVISOR_mmuext_op, (ulong)op, count, (ulong)scount, domid);
+}
+
+int
+HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
+{
+ return xencall3(__HYPERVISOR_set_gdt, (ulong)frame_list, entries);
+}
+
+int
+HYPERVISOR_stack_switch(ulong ss, ulong esp)
+{
+ return xencall3(__HYPERVISOR_stack_switch, ss, esp);
+}
+
+/* XXX match evfunc and fsfunc prototypes? */
+int
+HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc)
+{
+ return xencall5(__HYPERVISOR_set_callbacks, evss, evfunc, fsss, fsfunc);
+}
+
+int
+HYPERVISOR_fpu_taskswitch(void)
+{
+ return xencall1(__HYPERVISOR_fpu_taskswitch);
+}
+
+int
+HYPERVISOR_yield(void)
+{
+ return xencall3(__HYPERVISOR_sched_op, SCHEDOP_yield, 0);
+}
+
+int
+HYPERVISOR_block(void)
+{
+ return xencall3(__HYPERVISOR_sched_op, SCHEDOP_block, 0);
+}
+
+int
+HYPERVISOR_shutdown(int reboot)
+{
+ sched_shutdown_t arg;
+
+ arg.reason = reboot? SHUTDOWN_reboot : SHUTDOWN_poweroff;
+ return xencall3(__HYPERVISOR_sched_op, SCHEDOP_shutdown, (ulong)&arg);
+}
+
+int
+HYPERVISOR_multicall(void *call_list, int nr_calls)
+{
+ return xencall3(__HYPERVISOR_multicall, (ulong)call_list, nr_calls);
+}
+
+
+int
+HYPERVISOR_event_channel_op(void *op)
+{
+ return xencall2(__HYPERVISOR_event_channel_op, (ulong)op);
+}
+
+int
+HYPERVISOR_xen_version(int cmd, void *arg)
+{
+ return xencall3(__HYPERVISOR_xen_version, cmd, (ulong)arg);
+}
+
+int
+HYPERVISOR_console_io(int cmd, int count, char *str)
+{
+ return xencall4(__HYPERVISOR_console_io, cmd, count, (ulong)str);
+}
+
+int
+HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count)
+{
+ return xencall4(__HYPERVISOR_grant_table_op, cmd, (ulong)setup, count);
+}
+
+int
+HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg)
+{
+ return xencall3(__HYPERVISOR_memory_op, cmd, (ulong)arg);
+}
+
+/*
+ * XXX this comment is leftover from old code. revisit and update.
+ *
+ * The use of 'barrier' in the following reflects their use as local-lock
+ * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
+ * critical operations are executed. All critical operatiosn must complete
+ * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
+ * includes these barriers, for example.
+ */
+
+/*
+ * conversions to machine page numbers, pages and addresses
+ */
+#define MFN(pa) (patomfn[(pa)>>PGSHIFT])
+#define MFNPG(pa) ((uvlong)MFN(pa)<<PGSHIFT)
+#define PA2MA(pa) (MFNPG(pa) | PGOFF(pa))
+#define VA2MA(va) PA2MA(PADDR(va))
+#define VA2MFN(va) MFN(PADDR(va))
+
+ulong hypervisor_virt_start;
+ulong xentop;
+start_info_t *xenstart;
+shared_info_t *HYPERVISOR_shared_info;
+ulong *patomfn;
+ulong *matopfn;
+
+int
+xenpdptpin(ulong va)
+{
+ struct mmuext_op op;
+ ulong mfn;
+
+ mfn = MFN(PADDR(va));
+ LOG(dprint("pdptpin %lux %lux\n", va, mfn);)
+ print("pdptpin %lux %lux\n", va, mfn);
+ /* mark page readonly first */
+ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);
+
+ /* L3 here refers to page directory pointer table (PAE mode) */
+ op.cmd = MMUEXT_PIN_L3_TABLE;
+ op.arg1.mfn = mfn;
+ if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)
+ return 1;
+ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
+ return 0;
+}
+
+int
+xenpgdpin(ulong va)
+{
+ struct mmuext_op op;
+ ulong mfn;
+
+ mfn = MFN(PADDR(va));
+ LOG(dprint("pdpin %lux %lux\n", va, mfn);)
+ /* mark page readonly first */
+ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);
+
+ /* to confuse you, L2 here refers to page directories */
+ op.cmd = MMUEXT_PIN_L2_TABLE;
+ op.arg1.mfn = mfn;
+ if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)
+ return 1;
+ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
+ return 0;
+}
+
+int
+xenptpin(ulong va)
+{
+ struct mmuext_op op;
+ ulong mfn;
+
+ mfn = MFN(PADDR(va));
+ LOG(dprint("pin %lux %lux\n", va, mfn);)
+ /* mark page readonly first */
+ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);
+
+ /* to confuse you, L1 here refers to page tables */
+ op.cmd = MMUEXT_PIN_L1_TABLE;
+ op.arg1.mfn = mfn;
+ if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)
+ return 1;
+ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
+ return 0;
+}
+
+void
+xenptunpin(ulong va)
+{
+ struct mmuext_op op;
+ ulong mfn;
+
+ mfn = MFN(PADDR(va));
+ LOG(dprint("unpin %lux %lux\n", va, mfn);)
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.arg1.mfn = mfn;
+ if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0)
+ panic("xenptunpin va=%lux called from %lux", va, getcallerpc(&va));
+
+ /* mark page read-write */
+ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
+}
+
+void
+xenptswitch(ulong pa)
+{
+ struct mmuext_op op;
+
+ op.cmd = MMUEXT_NEW_BASEPTR;
+ op.arg1.mfn = MFN(pa);
+ if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0)
+ panic("xenptswitch");
+}
+
+void
+xentlbflush(void)
+{
+ struct mmuext_op op;
+
+ op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+ HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF);
+}
+
+/* update a pte using a machine page frame number */
+void
+xenupdatema(ulong *ptr, uvlong val)
+{
+ mmu_update_t u;
+
+ u.ptr = VA2MA(ptr);
+ u.val = val;
+ if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0)
+ panic("xenupdatema - pte %lux value %llux (was %llux) called from %lux", (ulong)ptr, val, *(uvlong*)ptr, getcallerpc(&ptr));
+}
+
+/* update a pte using a guest "physical" page number */
+void
+xenupdate(ulong *ptr, ulong val)
+{
+ mmu_update_t u;
+
+ u.ptr = VA2MA(ptr);
+ u.val = PA2MA(val);
+ if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0)
+ panic("xenupdate - pte %lux value %lux (%llux) called from %lux", (ulong)ptr, val, PA2MA(val), getcallerpc(&ptr));
+}
+
+void
+acceptframe(int ref, void *va)
+{
+ ulong mfn;
+
+ mfn = xengrantend(ref);
+ if (mfn == 0)
+ panic("can't accept page frame");
+ LOG(dprint("acceptframe ref %d va %lux mfn %lux\n", ref, (ulong)va, mfn);)
+ VA2MFN(va) = mfn;
+ mmumapframe((ulong)va, mfn);
+}
+
+int
+donateframe(int domid, void *va)
+{
+ ulong mfn;
+ int ref;
+ ulong *pte;
+ struct xen_memory_reservation mem;
+
+ mfn = VA2MFN(va);
+ ref = xengrant(domid, mfn, GTF_accept_transfer);
+ LOG(dprint("grant transfer %lux (%lux) -> %d\n", (ulong)va, mfn, ref);)
+ pte = mmuwalk(m->pdb, (ulong)va, 2, 0);
+ xenupdatema(pte, 0);
+ set_xen_guest_handle(mem.extent_start, &mfn);
+ mem.nr_extents = 1;
+ mem.extent_order = 0;
+ mem.address_bits = 0;
+ mem.domid = DOMID_SELF;
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &mem) != 1)
+ panic("XENMEM_decrease_reservation");
+ VA2MFN(va) = ~0;
+ return ref;
+}
+
+int
+shareframe(int domid, void *va, int write)
+{
+ ulong mfn;
+ int ref;
+ int flags;
+
+ mfn = VA2MFN(va);
+ flags = GTF_permit_access;
+ if (!write)
+ flags |= GTF_readonly;
+ ref = xengrant(domid, mfn, flags);
+ LOG(dprint("grant shared %lux (%lux) -> %d\n", (ulong)va, mfn, ref);)
+ return ref;
+}
+
+/*
+ * Upcall from hypervisor, entered with evtchn_upcall_pending masked.
+ */
+void
+xenupcall(Ureg *ureg)
+{
+ vcpu_info_t *vcpu;
+ shared_info_t *s;
+ ulong sel1, sel2, n1, n2, port;
+
+ ureg->ecode = 0;
+ s = HYPERVISOR_shared_info;
+ vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
+ for (;;) {
+ vcpu->evtchn_upcall_pending = 0;
+ sel1 = xchgl((uint*)&vcpu->evtchn_pending_sel, 0);
+ while(sel1) {
+ n1 = ffs(sel1);
+ sel1 &= ~(1<<n1);
+ sel2 = xchgl((uint*)&s->evtchn_pending[n1], 0);
+ while(sel2) {
+ n2 = ffs(sel2);
+ sel2 &= ~(1<<n2);
+ port = (n1<<5) + n2;
+ ureg->trap = 100+port;
+ trap(ureg);
+ }
+ }
+ if (vcpu->evtchn_upcall_pending)
+ continue;
+ vcpu->evtchn_upcall_mask = 0;
+ if (vcpu->evtchn_upcall_pending == 0)
+ break;
+ vcpu->evtchn_upcall_mask = 1;
+ }
+
+}
+
+/*
+ * tbdf field is abused to distinguish virqs from channels:
+ *
+ * tbdf=BUSUNKNOWN -> irq is a virq to be bound to a channel
+ * tbdf=0 -> irq is a channel number
+ */
+int
+xenintrenable(Vctl *v)
+{
+ evtchn_op_t op;
+ uint port;
+
+ /* XXX locking? */
+ if (v->tbdf != BUSUNKNOWN) {
+ op.cmd = EVTCHNOP_bind_virq;
+ op.u.bind_virq.virq = v->irq;
+ op.u.bind_virq.vcpu = m->machno;
+ if(HYPERVISOR_event_channel_op(&op) != 0)
+ panic("xenintrenable: bind %d failed", v->irq);
+ port = op.u.bind_virq.port;
+ } else
+ port = v->irq;
+ if (port > 155)
+ return -1;
+ HYPERVISOR_shared_info->evtchn_mask[port/32] &= ~(1<<(port%32));
+ if(0)print("xenintrenable %s: irq %d port %d mask[%d] = %#lux\n", v->name, v->irq, port, port/32, HYPERVISOR_shared_info->evtchn_mask[port/32]);
+ return 100+port;
+}
+
+int
+xenintrdisable(int irq)
+{
+ USED(irq);
+ panic("xenintrdisable notyet\n");
+ return 0;
+}
+
+int
+xenintrvecno(int irq)
+{
+ return irq;
+}
+
+int
+islo(void)
+{
+ vcpu_info_t *cpu;
+
+ cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared
+ return (cpu->evtchn_upcall_mask == 0);
+}
+
+/*
+ * Note: Portable code expects spllo <= spl* <= spldone for
+ * accounting purposes. Lets hope the compiler doesn't reorder
+ * us.
+ */
+int
+spllo(void)
+{
+ vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared
+
+ if(cpu->evtchn_upcall_mask == 0)
+ return 0;
+ m->splpc = 0;
+ cpu->evtchn_upcall_mask = 0;
+
+ /*
+ * If an event arrived while masked off,
+ * use a dummy call to trigger delivery
+ */
+ if (cpu->evtchn_upcall_pending)
+ HYPERVISOR_xen_version(0, 0);
+
+ return 1;
+}
+
+int
+splhi(void)
+{
+ ulong dummy;
+ vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared
+ int oldmask;
+
+ oldmask = xchgb(&cpu->evtchn_upcall_mask, 1);
+ if (cpu->evtchn_upcall_mask != 1)
+ panic("xchgb");
+ /* XXX ad-hoc ¨getcallerpc" because we have no arguments */
+ m->splpc = (&dummy)[1];
+ return oldmask;
+}
+
+void
+splx(int x)
+{
+ if(x)
+ splhi();
+ else
+ spllo();
+}
+
+/* marker for profiling in portable code */
+void
+spldone(void)
+{
+}
+
+/* allocate an event channel */
+int
+xenchanalloc(int dom)
+{
+ evtchn_op_t op;
+
+ op.cmd = EVTCHNOP_alloc_unbound;
+ op.u.alloc_unbound.dom = DOMID_SELF;
+ op.u.alloc_unbound.remote_dom = dom;
+ if (HYPERVISOR_event_channel_op(&op) != 0)
+ panic("xenchanalloc");
+ return op.u.alloc_unbound.port;
+}
+
+/* notify over an event channel */
+void
+xenchannotify(int port)
+{
+ evtchn_op_t op;
+
+ op.cmd = EVTCHNOP_send;
+ op.u.send.port = port;
+ HYPERVISOR_event_channel_op(&op);
+}
+
+void
+halt(void)
+{
+ extern int nrdy;
+
+ splhi();
+ if (nrdy) {
+ spllo();
+ return;
+ }
+ HYPERVISOR_block();
+}
+
+void
+mb(void)
+{
+ coherence();
+}
--- /dev/null
+++ b/sys/src/9/xen/xentimer.c
@@ -1,0 +1,136 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+static vcpu_time_info_t shadow[MAX_VIRT_CPUS]; // XXX should be in Mach
+static ulong wallclock;
+static ulong wallclocksystime;
+
+/*
+ * Return a consistent set of time parameters.
+ */
+static vcpu_time_info_t *
+getshadow(void)
+{
+ vcpu_time_info_t *s, *t;
+
+ t = &HYPERVISOR_shared_info->vcpu_info[m->machno].time;
+ s = &shadow[m->machno]; // XXX place in mach struct
+ while(t->version != s->version) {
+ if (t->version&1)
+ continue;
+ s->version = t->version;
+ s->tsc_timestamp = t->tsc_timestamp;
+ s->system_time = t->system_time;
+ s->tsc_to_system_mul = t->tsc_to_system_mul;
+ s->tsc_shift = t->tsc_shift;
+ }
+ return s;
+}
+
+
+/* just get it from the shared info */
+void
+guesscpuhz(int) // XXX no arg!
+{
+ vcpu_time_info_t *t;
+
+ t = getshadow();
+ m->cpuhz = (1000000000LL << 32) / t->tsc_to_system_mul;
+ if(t->tsc_shift < 0)
+ m->cpuhz <<= -t->tsc_shift;
+ else
+ m->cpuhz >>= t->tsc_shift;
+ m->cpumhz = m->cpuhz / 1000000L;
+}
+
+void
+xentimerset(uvlong next)
+{
+ uvlong soon;
+
+ soon = fastticks(0) + 100000;
+ if (next < soon)
+ next = soon;
+ HYPERVISOR_set_timer_op(next);
+}
+
+void
+xentimerclock(Ureg* ureg, void*)
+{
+
+ timerintr(ureg, 0);
+}
+
+void
+xentimerenable(void)
+{
+ intrenable(VIRQ_TIMER, xentimerclock, nil, 0, "Xen Timer");
+}
+
+uvlong
+xentimerread(uvlong *hz)
+{
+ uvlong x;
+ uvlong delta, sdelta;
+ vcpu_time_info_t *t;
+
+ t = getshadow();
+ cycles(&x);
+ delta = x - t->tsc_timestamp;
+ if (t->tsc_shift < 0)
+ delta >>= -t->tsc_shift;
+ else
+ delta <<= t->tsc_shift;
+ mul64fract(&sdelta, delta, t->tsc_to_system_mul);
+ x = t->system_time + sdelta;
+ if (HYPERVISOR_shared_info->wc_sec != wallclock) {
+ wallclock = HYPERVISOR_shared_info->wc_sec;
+ wallclocksystime = x;
+ }
+ if (hz)
+ *hz = 1000000000;
+ return x;
+}
+
+ulong
+xenwallclock()
+{
+ ulong elapsed;
+
+ elapsed = (ulong)((xentimerread(0) - wallclocksystime)/1000000000);
+ return wallclock + elapsed;
+}
+
+void
+microdelay(int microsecs)
+{
+ uvlong targ, hz;
+
+ targ = xentimerread(&hz);
+ targ += microsecs * hz / 1000000;
+ while(xentimerread(0) < targ)
+ continue;
+}
+
+void
+delay(int millisecs)
+{
+ microdelay(millisecs * 1000);
+}
+
+/*
+ * performance measurement ticks. must be low overhead.
+ * doesn't have to count over a second.
+ */
+ulong
+perfticks(void)
+{
+ uvlong x;
+
+ cycles(&x);
+ return x;
+}