shithub: riscv

Download patch

ref: 56343cafcfb47a4ef3fff0d6a8e3220ecd93518b
parent: 28ad4e661610353efec655fdf147a46e156bf46e
author: cinap_lenrek <[email protected]>
date: Sat Feb 1 05:25:10 EST 2014

add experimental pc64 kernel

--- /dev/null
+++ b/sys/src/9/pc64/apbootstrap.s
@@ -1,0 +1,169 @@
+/*
+ * Start an Application Processor. This must be placed on a 4KB boundary
+ * somewhere in the 1st MB of conventional memory (APBOOTSTRAP). However,
+ * due to some shortcuts below it's restricted further to within the 1st
+ * 64KB. The AP starts in real-mode, with
+ *   CS selector set to the startup memory address/16;
+ *   CS base set to startup memory address;
+ *   CS limit set to 64KB;
+ *   CPL and IP set to 0.
+ */
+#include "mem.h"
+
+#define NOP		BYTE $0x90		/* NOP */
+
+#define pFARJMP32(s, o)	BYTE $0xea;		/* far jmp ptr32:16 */	\
+			LONG $o; WORD $s
+#define rFARJMP16(s, o)	BYTE $0xea;		/* far jump ptr16:16 */	\
+			WORD $o; WORD $s;
+#define rFARJMP32(s, o)	BYTE $0x66;		/* far jump ptr32:16 */	\
+			pFARJMP32(s, o)
+
+#define rLGDT(gdtptr)	BYTE $0x0f;		/* LGDT */		\
+			BYTE $0x01; BYTE $0x16;				\
+			WORD $gdtptr
+
+#define rMOVAX(i)	BYTE $0xb8;		/* i -> AX */		\
+			WORD $i;
+
+#define	DELAY		BYTE $0xEB;		/* JMP .+2 */			\
+			BYTE $0x00
+
+MODE $16
+
+TEXT apbootstrap(SB), 1, $-4 
+	rFARJMP16(0, _apbootstrap-KZERO(SB))
+	NOP; NOP; NOP;
+TEXT _apvector(SB), 1, $-4 			/* address APBOOTSTRAP+0x08 */
+	QUAD $0
+TEXT _appml4(SB), 1, $-4 			/* address APBOOTSTRAP+0x10 */
+	QUAD $0
+TEXT _apapic(SB), 1, $-4 			/* address APBOOTSTRAP+0x18 */
+	QUAD $0
+TEXT _apmach(SB), 1, $-4 			/* address APBOOTSTRAP+0x20 */
+	QUAD $0
+TEXT _apbootstrap(SB), 1, $-4 
+	MOVW	CS, AX
+	MOVW	AX, DS				/* initialise DS */
+
+	rLGDT(_gdtptr32p<>-KZERO(SB))		/* load a basic gdt */
+
+	MOVL	CR0, AX
+	ORL	$1, AX
+	MOVL	AX, CR0				/* turn on protected mode */
+	DELAY					/* JMP .+2 */
+
+	rFARJMP16(SELECTOR(3, SELGDT, 0), _ap32-KZERO(SB))
+
+/*
+ * Enable and activate Long Mode. From the manual:
+ * 	make sure Page Size Extentions are off, and Page Global
+ *	Extensions and Physical Address Extensions are on in CR4;
+ *	set Long Mode Enable in the Extended Feature Enable MSR;
+ *	set Paging Enable in CR0;
+ *	make an inter-segment jump to the Long Mode code.
+ * It's all in 32-bit mode until the jump is made.
+ */
+MODE $32
+
+TEXT _ap32(SB), 1, $-4
+	MOVW	$SELECTOR(2, SELGDT, 0), AX
+	MOVW	AX, DS
+	MOVW	AX, ES
+	MOVW	AX, FS
+	MOVW	AX, GS
+	MOVW	AX, SS
+
+	MOVL	_appml4-KZERO(SB), AX	/* physical address of PML4 */
+	MOVL	AX, CR3			/* load the mmu */
+	DELAY
+
+	MOVL	CR4, AX
+	ANDL	$~0x00000010, AX		/* Page Size */
+	ORL	$0x000000A0, AX			/* Page Global, Phys. Address */
+	MOVL	AX, CR4
+
+	MOVL	$0xc0000080, CX			/* Extended Feature Enable */
+	RDMSR
+	ORL	$0x00000100, AX			/* Long Mode Enable */
+	WRMSR
+
+	MOVL	CR0, DX
+	ANDL	$~0x6000000a, DX
+	ORL	$0x80010000, DX			/* Paging Enable, Write Protect */
+	MOVL	DX, CR0
+
+	pFARJMP32(SELECTOR(KESEG, SELGDT, 0), _ap64-KZERO(SB))
+
+/*
+ * Long mode. Welcome to 2003.
+ * Jump out of the identity map space;
+ * load a proper long mode GDT;
+ * zap the identity map;
+ * initialise the stack and call the
+ * C startup code in m->splpc.
+ */
+MODE $64
+
+TEXT _ap64(SB), 1, $-4
+	MOVQ	$_gdtptr64v<>(SB), AX
+	MOVL	(AX), GDTR
+
+	XORQ	AX, AX
+	MOVW	AX, DS				/* not used in long mode */
+	MOVW	AX, ES				/* not used in long mode */
+	MOVW	AX, FS
+	MOVW	AX, GS
+	MOVW	AX, SS				/* not used in long mode */
+
+	MOVW	AX, LDTR
+
+	MOVQ	_apmach(SB), SP
+
+	MOVQ	AX, RUSER			/* up = 0; */
+	MOVQ	SP, RMACH			/* m = apmach */
+
+	ADDQ	$MACHSIZE, SP
+
+	PUSHQ	AX				/* clear flags */
+	POPFQ
+
+	MOVQ	_apvector(SB), AX
+	MOVQ	_apapic(SB), RARG
+	PUSHQ	RARG
+
+	CALL	*AX
+
+_halt:
+	HLT
+	JMP _halt
+	
+TEXT _gdt<>(SB), 1, $-4
+	/* null descriptor */
+	LONG	$0
+	LONG	$0
+
+	/* (KESEG) 64 bit long mode exec segment */
+	LONG	$(0xFFFF)
+	LONG	$(SEGL|SEGG|SEGP|(0xF<<16)|SEGPL(0)|SEGEXEC|SEGR)
+
+	/* 32 bit data segment descriptor for 4 gigabytes (PL 0) */
+	LONG	$(0xFFFF)
+	LONG	$(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW)
+
+	/* 32 bit exec segment descriptor for 4 gigabytes (PL 0) */
+	LONG	$(0xFFFF)
+	LONG	$(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR)
+
+
+TEXT _gdtptr32p<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	LONG	$_gdt<>-KZERO(SB)
+
+TEXT _gdtptr64p<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	QUAD	$_gdt<>-KZERO(SB)
+
+TEXT _gdtptr64v<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	QUAD	$_gdt<>(SB)
--- /dev/null
+++ b/sys/src/9/pc64/dat.h
@@ -1,0 +1,360 @@
+typedef struct BIOS32si	BIOS32si;
+typedef struct BIOS32ci	BIOS32ci;
+typedef struct Conf	Conf;
+typedef struct Confmem	Confmem;
+typedef union  FPsave	FPsave;
+typedef struct Fxsave	Fxsave;
+typedef struct FPstate	FPstate;
+typedef struct ISAConf	ISAConf;
+typedef struct Label	Label;
+typedef struct Lock	Lock;
+typedef struct MMU	MMU;
+typedef struct Mach	Mach;
+typedef struct Notsave	Notsave;
+typedef struct PCArch	PCArch;
+typedef struct Pcidev	Pcidev;
+typedef struct PCMmap	PCMmap;
+typedef struct PCMslot	PCMslot;
+typedef struct Page	Page;
+typedef struct PMMU	PMMU;
+typedef struct Proc	Proc;
+typedef struct Segdesc	Segdesc;
+typedef vlong		Tval;
+typedef struct Ureg	Ureg;
+typedef struct Vctl	Vctl;
+
+#pragma incomplete BIOS32si
+#pragma incomplete Pcidev
+#pragma incomplete Ureg
+
+#define MAXSYSARG	5	/* for mount(fd, afd, mpt, flag, arg) */
+
+/*
+ *  parameters for sysproc.c
+ */
+#define AOUT_MAGIC	(S_MAGIC)
+
+struct Lock
+{
+	ulong	key;
+	ulong	sr;
+	uintptr	pc;
+	Proc	*p;
+	Mach	*m;
+	ushort	isilock;
+	long	lockcycles;
+};
+
+struct Label
+{
+	uintptr	sp;
+	uintptr	pc;
+};
+
+/*
+ * FPsave.status
+ */
+enum
+{
+	/* this is a state */
+	FPinit=		0,
+	FPactive=	1,
+	FPinactive=	2,
+
+	/* the following is a bit that can be or'd into the state */
+	FPillegal=	0x100,
+};
+
+/*
+ * the FP regs must be stored here, not somewhere pointed to from here.
+ * port code assumes this.
+ */
+struct Fxsave {
+	u16int	fcw;			/* x87 control word */
+	u16int	fsw;			/* x87 status word */
+	u8int	ftw;			/* x87 tag word */
+	u8int	zero;			/* 0 */
+	u16int	fop;			/* last x87 opcode */
+	u64int	rip;			/* last x87 instruction pointer */
+	u64int	rdp;			/* last x87 data pointer */
+	u32int	mxcsr;			/* MMX control and status */
+	u32int	mxcsrmask;		/* supported MMX feature bits */
+	uchar	st[128];		/* shared 64-bit media and x87 regs */
+	uchar	xmm[256];		/* 128-bit media regs */
+	uchar	ign[96];		/* reserved, ignored */
+};
+
+union FPsave {
+	uchar align[512+15];
+	Fxsave;
+};
+
+struct Confmem
+{
+	uintptr	base;
+	ulong	npage;
+	uintptr	kbase;
+	uintptr	klimit;
+};
+
+struct Conf
+{
+	ulong	nmach;		/* processors */
+	ulong	nproc;		/* processes */
+	ulong	monitor;	/* has monitor? */
+	Confmem	mem[4];		/* physical memory */
+	ulong	npage;		/* total physical pages of memory */
+	ulong	upages;		/* user page pool */
+	ulong	nimage;		/* number of page cache image headers */
+	ulong	nswap;		/* number of swap pages */
+	int	nswppo;		/* max # of pageouts per segment pass */
+	ulong	copymode;	/* 0 is copy on write, 1 is copy on reference */
+	ulong	ialloc;		/* max interrupt time allocation in bytes */
+	ulong	pipeqsize;	/* size in bytes of pipe queues */
+	int	nuart;		/* number of uart devices */
+};
+
+struct Segdesc
+{
+	u32int	d0;
+	u32int	d1;
+};
+
+/*
+ *  MMU structure for PDP, PD, PT pages.
+ */
+struct MMU
+{
+	MMU	*next;
+	uintptr	*page;
+	int	index;
+	int	level;
+};
+
+/*
+ *  MMU stuff in proc
+ */
+#define NCOLOR 1
+struct PMMU
+{
+	MMU	*mmuhead;
+	MMU	*mmutail;
+	int	mmucount;
+};
+
+/*
+ *  things saved in the Proc structure during a notify
+ */
+struct Notsave
+{
+	ulong	svflags;
+	ulong	svcs;
+	ulong	svss;
+};
+
+#include "../port/portdat.h"
+
+typedef struct {
+	u32int	_0_;
+	u32int	rsp0[2];
+	u32int	rsp1[2];
+	u32int	rsp2[2];
+	u32int	_28_[2];
+	u32int	ist[14];
+	u16int	_92_[5];
+	u16int	iomap;
+} Tss;
+
+struct Mach
+{
+	int	machno;			/* physical id of processor (KNOWN TO ASSEMBLY) */
+	uintptr	splpc;			/* pc of last caller to splhi (KNOWN TO ASSEMBLY) */
+
+	Proc*	proc;			/* current process on this processor (KNOWN TO ASSEMBLY) */
+
+	u64int*	pml4;			/* pml4 base for this processor (va) */
+	Tss*	tss;			/* tss for this processor */
+	Segdesc	*gdt;			/* gdt for this processor */
+
+	u64int	mmumap[4];		/* bitmap of pml4 entries for zapping */
+	MMU*	mmufree;		/* freelist for MMU structures */
+	int	mmucount;		/* number of MMU structures in freelist */
+	int	kmapindex;		/* next KMAP page index for use */
+
+	ulong	ticks;			/* of the clock since boot time */
+	Label	sched;			/* scheduler wakeup */
+	Lock	alarmlock;		/* access to alarm list */
+	void*	alarm;			/* alarms bound to this clock */
+	int	inclockintr;
+
+	Proc*	readied;		/* for runproc */
+	ulong	schedticks;		/* next forced context switch */
+
+	int	tlbfault;
+	int	tlbpurge;
+	int	pfault;
+	int	cs;
+	int	syscall;
+	int	load;
+	int	intr;
+	int	flushmmu;		/* make current proc flush it's mmu state */
+	int	ilockdepth;
+	Perf	perf;			/* performance counters */
+
+	ulong	spuriousintr;
+	int	lastintr;
+
+	int	loopconst;
+
+	int	cpumhz;
+	uvlong	cyclefreq;		/* Frequency of user readable cycle counter */
+	uvlong	cpuhz;
+	int	cpuidax;
+	int	cpuidcx;
+	int	cpuiddx;
+	char	cpuidid[16];
+	char*	cpuidtype;
+	int	havetsc;
+	int	havepge;
+	uvlong	tscticks;
+	int	pdballoc;
+	int	pdbfree;
+
+	vlong	mtrrcap;
+	vlong	mtrrdef;
+	vlong	mtrrfix[11];
+	vlong	mtrrvar[32];		/* 256 max. */
+
+	uintptr	stack[1];
+};
+
+/*
+ * KMap the structure
+ */
+typedef void KMap;
+#define	VA(k)		((void*)k)
+
+struct
+{
+	Lock;
+	int	machs;			/* bitmap of active CPUs */
+	int	exiting;		/* shutdown */
+	int	ispanic;		/* shutdown in response to a panic */
+	int	thunderbirdsarego;	/* lets the added processors continue to schedinit */
+}active;
+
+/*
+ *  routines for things outside the PC model, like power management
+ */
+struct PCArch
+{
+	char*	id;
+	int	(*ident)(void);		/* this should be in the model */
+	void	(*reset)(void);		/* this should be in the model */
+	int	(*serialpower)(int);	/* 1 == on, 0 == off */
+	int	(*modempower)(int);	/* 1 == on, 0 == off */
+
+	void	(*intrinit)(void);
+	int	(*intrenable)(Vctl*);
+	int	(*intrvecno)(int);
+	int	(*intrdisable)(int);
+	void	(*introff)(void);
+	void	(*intron)(void);
+
+	void	(*clockenable)(void);
+	uvlong	(*fastclock)(uvlong*);
+	void	(*timerset)(uvlong);
+};
+
+/* cpuid instruction result register bits */
+enum {
+	/* cx */
+	Monitor	= 1<<3,
+
+	/* dx */
+	Fpuonchip = 1<<0,
+	Vmex	= 1<<1,		/* virtual-mode extensions */
+	Pse	= 1<<3,		/* page size extensions */
+	Tsc	= 1<<4,		/* time-stamp counter */
+	Cpumsr	= 1<<5,		/* model-specific registers, rdmsr/wrmsr */
+	Pae	= 1<<6,		/* physical-addr extensions */
+	Mce	= 1<<7,		/* machine-check exception */
+	Cmpxchg8b = 1<<8,
+	Cpuapic	= 1<<9,
+	Mtrr	= 1<<12,	/* memory-type range regs.  */
+	Pge	= 1<<13,	/* page global extension */
+	Pse2	= 1<<17,	/* more page size extensions */
+	Clflush = 1<<19,
+	Acpif	= 1<<22,	/* therm control msr */
+	Mmx	= 1<<23,
+	Fxsr	= 1<<24,	/* have SSE FXSAVE/FXRSTOR */
+	Sse	= 1<<25,	/* thus sfence instr. */
+	Sse2	= 1<<26,	/* thus mfence & lfence instr.s */
+	Rdrnd	= 1<<30,	/* RDRAND support bit */
+};
+
+enum {						/* MSRs */
+	PerfEvtbase	= 0xc0010000,		/* Performance Event Select */
+	PerfCtrbase	= 0xc0010004,		/* Performance Counters */
+
+	Efer		= 0xc0000080,		/* Extended Feature Enable */
+	Star		= 0xc0000081,		/* Legacy Target IP and [CS]S */
+	Lstar		= 0xc0000082,		/* Long Mode Target IP */
+	Cstar		= 0xc0000083,		/* Compatibility Target IP */
+	Sfmask		= 0xc0000084,		/* SYSCALL Flags Mask */
+	FSbase		= 0xc0000100,		/* 64-bit FS Base Address */
+	GSbase		= 0xc0000101,		/* 64-bit GS Base Address */
+	KernelGSbase	= 0xc0000102,		/* SWAPGS instruction */
+};
+
+/*
+ *  a parsed plan9.ini line
+ */
+#define NISAOPT		8
+
+struct ISAConf {
+	char	*type;
+	ulong	port;
+	int	irq;
+	ulong	dma;
+	ulong	mem;
+	ulong	size;
+	ulong	freq;
+
+	int	nopt;
+	char	*opt[NISAOPT];
+};
+
+extern PCArch	*arch;			/* PC architecture */
+
+Mach* machp[MAXMACH];
+	
+#define	MACHP(n)	(machp[n])
+
+extern register Mach* m;			/* R15 */
+extern register Proc* up;			/* R14 */
+
+/*
+ *  hardware info about a device
+ */
+typedef struct {
+	ulong	port;	
+	int	size;
+} Devport;
+
+struct DevConf
+{
+	ulong	intnum;			/* interrupt number */
+	char	*type;			/* card type, malloced */
+	int	nports;			/* Number of ports */
+	Devport	*ports;			/* The ports themselves */
+};
+
+typedef struct BIOS32ci {		/* BIOS32 Calling Interface */
+	u32int	eax;
+	u32int	ebx;
+	u32int	ecx;
+	u32int	edx;
+	u32int	esi;
+	u32int	edi;
+} BIOS32ci;
--- /dev/null
+++ b/sys/src/9/pc64/fns.h
@@ -1,0 +1,189 @@
+#include "../port/portfns.h"
+
+void	aamloop(int);
+Dirtab*	addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));
+void	archinit(void);
+int	bios32call(BIOS32ci*, u16int[3]);
+int	bios32ci(BIOS32si*, BIOS32ci*);
+void	bios32close(BIOS32si*);
+BIOS32si* bios32open(char*);
+void	bootargs(void*);
+uintptr	cankaddr(uintptr);
+int	checksum(void *, int);
+void	clockintr(Ureg*, void*);
+int	(*cmpswap)(long*, long, long);
+int	cmpswap486(long*, long, long);
+void	(*coherence)(void);
+void	cpuid(int, ulong regs[]);
+int	cpuidentify(void);
+void	cpuidprint(void);
+void	(*cycles)(uvlong*);
+void	delay(int);
+void*	dmabva(int);
+int	dmacount(int);
+int	dmadone(int);
+void	dmaend(int);
+int	dmainit(int, int);
+#define DMAWRITE 0
+#define DMAREAD 1
+#define DMALOOP 2
+long	dmasetup(int, void*, long, int);
+#define	evenaddr(x)				/* x86 doesn't care */
+void	(*fprestore)(FPsave*);
+void	(*fpsave)(FPsave*);
+void	fpsserestore(FPsave*);
+void	fpssesave(FPsave*);
+void	fpx87restore(FPsave*);
+void	fpx87save(FPsave*);
+u64int	getcr0(void);
+u64int	getcr2(void);
+u64int	getcr3(void);
+u64int	getcr4(void);
+char*	getconf(char*);
+void	guesscpuhz(int);
+void	halt(void);
+void	mwait(void*);
+int	i8042auxcmd(int);
+int	i8042auxcmds(uchar*, int);
+void	i8042auxenable(void (*)(int, int));
+void	i8042reset(void);
+void	i8250console(void);
+void*	i8250alloc(int, int, int);
+void	i8253enable(void);
+void	i8253init(void);
+void	i8253reset(void);
+uvlong	i8253read(uvlong*);
+void	i8253timerset(uvlong);
+int	i8259disable(int);
+int	i8259enable(Vctl*);
+void	i8259init(void);
+int	i8259isr(int);
+void	i8259on(void);
+void	i8259off(void);
+int	i8259vecno(int);
+void	idle(void);
+void	idlehands(void);
+int	inb(int);
+void	insb(int, void*, int);
+ushort	ins(int);
+void	inss(int, void*, int);
+ulong	inl(int);
+void	insl(int, void*, int);
+int	intrdisable(int, void (*)(Ureg *, void *), void*, int, char*);
+void	intrenable(int, void (*)(Ureg*, void*), void*, int, char*);
+void	introff(void);
+void	intron(void);
+void	invlpg(uintptr);
+void	iofree(int);
+void	ioinit(void);
+int	iounused(int, int);
+int	ioalloc(int, int, int, char*);
+int	ioreserve(int, int, int, char*);
+int	iprint(char*, ...);
+int	isaconfig(char*, int, ISAConf*);
+void*	kaddr(uintptr);
+void	kbdenable(void);
+void	kbdinit(void);
+KMap*	kmap(Page*);
+void	kunmap(KMap*);
+#define	kmapinval()
+void	lgdt(void*);
+void	lidt(void*);
+void	links(void);
+void	ltr(ulong);
+void	mach0init(void);
+void	mathinit(void);
+void	mb386(void);
+void	mb586(void);
+void	meminit(void);
+void	memorysummary(void);
+void	mfence(void);
+#define mmuflushtlb() putcr3(getcr3())
+void	mmuinit(void);
+uintptr	*mmuwalk(uintptr*, uintptr, int, int);
+int	mtrr(uvlong, uvlong, char *);
+void	mtrrclock(void);
+int	mtrrprint(char *, long);
+uchar	nvramread(int);
+void	nvramwrite(int, uchar);
+void	outb(int, int);
+void	outsb(int, void*, int);
+void	outs(int, ushort);
+void	outss(int, void*, int);
+void	outl(int, ulong);
+void	outsl(int, void*, int);
+uintptr	paddr(void*);
+ulong	pcibarsize(Pcidev*, int);
+void	pcibussize(Pcidev*, ulong*, ulong*);
+int	pcicfgr8(Pcidev*, int);
+int	pcicfgr16(Pcidev*, int);
+int	pcicfgr32(Pcidev*, int);
+void	pcicfgw8(Pcidev*, int, int);
+void	pcicfgw16(Pcidev*, int, int);
+void	pcicfgw32(Pcidev*, int, int);
+void	pciclrbme(Pcidev*);
+void	pciclrioe(Pcidev*);
+void	pciclrmwi(Pcidev*);
+int	pcigetpms(Pcidev*);
+void	pcihinv(Pcidev*);
+uchar	pciipin(Pcidev*, uchar);
+Pcidev* pcimatch(Pcidev*, int, int);
+Pcidev* pcimatchtbdf(int);
+int	pcicap(Pcidev*, int);
+int	pcihtcap(Pcidev*, int);
+void	pcireset(void);
+int	pciscan(int, Pcidev**);
+void	pcisetbme(Pcidev*);
+void	pcisetioe(Pcidev*);
+void	pcisetmwi(Pcidev*);
+int	pcisetpms(Pcidev*, int);
+void	pcmcisread(PCMslot*);
+int	pcmcistuple(int, int, int, void*, int);
+PCMmap*	pcmmap(int, ulong, int, int);
+int	pcmspecial(char*, ISAConf*);
+int	(*_pcmspecial)(char *, ISAConf *);
+void	pcmspecialclose(int);
+void	(*_pcmspecialclose)(int);
+void	pcmunmap(int, PCMmap*);
+void	pmap(uintptr *, uintptr, uintptr, int);
+void	procrestore(Proc*);
+void	procsave(Proc*);
+void	procsetup(Proc*);
+void	procfork(Proc*);
+void	putcr0(u64int);
+void	putcr3(u64int);
+void	putcr4(u64int);
+void*	rampage(void);
+int	rdmsr(int, vlong*);
+void	realmode(Ureg*);
+void	screeninit(void);
+void	(*screenputs)(char*, int);
+void*	sigsearch(char*);
+void	syncclock(void);
+void	syscallentry(void);
+void	touser(void*);
+void	trapenable(int, void (*)(Ureg*, void*), void*, char*);
+void	trapinit(void);
+void	trapinit0(void);
+int	tas(void*);
+uvlong	tscticks(uvlong*);
+uintptr	umbmalloc(uintptr, int, int);
+void	umbfree(uintptr, int);
+uintptr	umbrwmalloc(uintptr, int, int);
+void	umbrwfree(uintptr, int);
+uintptr	upaalloc(int, int);
+void	upafree(uintptr, int);
+void	upareserve(uintptr, int);
+void	vectortable(void);
+void*	vmap(uintptr, int);
+int	vmapsync(uintptr);
+void	vunmap(void*, int);
+void	wbinvd(void);
+int	wrmsr(int, vlong);
+int	xchgw(ushort*, int);
+void	rdrandbuf(void*, ulong);
+
+#define	userureg(ur)	(((ur)->cs & 3) == 3)
+#define	waserror()	(up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
+#define	KADDR(a)	kaddr(a)
+#define PADDR(a)	paddr((void*)(a))
--- /dev/null
+++ b/sys/src/9/pc64/l.s
@@ -1,0 +1,1050 @@
+#include "mem.h"
+
+MODE $32
+
+#define DELAY		BYTE $0xEB; BYTE $0x00	/* JMP .+2 */
+
+#define pFARJMP32(s, o)	BYTE $0xea;		/* far jump to ptr32:16 */\
+			LONG $o; WORD $s
+
+/*
+ * Enter here in 32-bit protected mode. Welcome to 1982.
+ * Make sure the GDT is set as it should be:
+ *	disable interrupts;
+ *	load the GDT with the table in _gdt32p;
+ *	load all the data segments
+ *	load the code segment via a far jump.
+ */
+TEXT _protected<>(SB), 1, $-4
+	CLI
+
+	MOVL	$_gdtptr32p<>-KZERO(SB), AX
+	MOVL	(AX), GDTR
+
+	MOVL	$SELECTOR(2, SELGDT, 0), AX
+	MOVW	AX, DS
+	MOVW	AX, ES
+	MOVW	AX, FS
+	MOVW	AX, GS
+	MOVW	AX, SS
+
+	pFARJMP32(SELECTOR(3, SELGDT, 0), _warp64<>-KZERO(SB))
+
+TEXT _gdt<>(SB), 1, $-4
+	/* null descriptor */
+	LONG	$0
+	LONG	$0
+
+	/* (KESEG) 64 bit long mode exec segment */
+	LONG	$(0xFFFF)
+	LONG	$(SEGL|SEGG|SEGP|(0xF<<16)|SEGPL(0)|SEGEXEC|SEGR)
+
+	/* 32 bit data segment descriptor for 4 gigabytes (PL 0) */
+	LONG	$(0xFFFF)
+	LONG	$(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW)
+
+	/* 32 bit exec segment descriptor for 4 gigabytes (PL 0) */
+	LONG	$(0xFFFF)
+	LONG	$(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR)
+
+
+TEXT _gdtptr32p<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	LONG	$_gdt<>-KZERO(SB)
+
+TEXT _gdtptr64p<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	QUAD	$_gdt<>-KZERO(SB)
+
+TEXT _gdtptr64v<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	QUAD	$_gdt<>(SB)
+
+/*
+ * Macros for accessing page table entries; change the
+ * C-style array-index macros into a page table byte offset
+ */
+#define PML4O(v)	((PTLX((v), 3))<<3)
+#define PDPO(v)		((PTLX((v), 2))<<3)
+#define PDO(v)		((PTLX((v), 1))<<3)
+#define PTO(v)		((PTLX((v), 0))<<3)
+
+TEXT _warp64<>(SB), 1, $-4
+
+	/* clear mach and page tables */
+	MOVL	$((CPU0END-CPU0PML4)>>2), CX
+	MOVL	$(CPU0PML4-KZERO), SI
+	MOVL	SI, DI
+	XORL	AX, AX
+	CLD
+	REP;	STOSL
+
+	MOVL	SI, AX				/* PML4 */
+	MOVL	AX, DX
+	ADDL	$(PTSZ|PTEWRITE|PTEVALID), DX	/* PDP at PML4 + PTSZ */
+	MOVL	DX, PML4O(0)(AX)		/* PML4E for double-map */
+	MOVL	DX, PML4O(KZERO)(AX)		/* PML4E for KZERO */
+
+	ADDL	$PTSZ, AX			/* PDP at PML4 + PTSZ */
+	ADDL	$PTSZ, DX			/* PD at PML4 + 2*PTSZ */
+	MOVL	DX, PDPO(0)(AX)			/* PDPE for double-map */
+	MOVL	DX, PDPO(KZERO)(AX)		/* PDPE for KZERO */
+
+	ADDL	$PTSZ, AX			/* PD at PML4 + 2*PTSZ */
+	MOVL	$(PTESIZE|PTEGLOBAL|PTEWRITE|PTEVALID), DX
+	MOVL	DX, PDO(0)(AX)			/* PDE for double-map */
+
+	ADDL	$PDO(KZERO), AX
+memloop:
+	MOVL	DX, 0(AX)
+	ADDL	$PGLSZ(1), DX
+	ADDL	$8, AX
+	CMPL	DX, $INIMAP
+	JLT	memloop
+
+/*
+ * Enable and activate Long Mode. From the manual:
+ * 	make sure Page Size Extentions are off, and Page Global
+ *	Extensions and Physical Address Extensions are on in CR4;
+ *	set Long Mode Enable in the Extended Feature Enable MSR;
+ *	set Paging Enable in CR0;
+ *	make an inter-segment jump to the Long Mode code.
+ * It's all in 32-bit mode until the jump is made.
+ */
+TEXT _lme<>(SB), 1, $-4
+	MOVL	SI, CR3				/* load the mmu */
+	DELAY
+
+	MOVL	CR4, AX
+	ANDL	$~0x00000010, AX			/* Page Size */
+	ORL	$0x000000A0, AX			/* Page Global, Phys. Address */
+	MOVL	AX, CR4
+
+	MOVL	$0xc0000080, CX			/* Extended Feature Enable */
+	RDMSR
+	ORL	$0x00000100, AX			/* Long Mode Enable */
+	WRMSR
+
+	MOVL	CR0, DX
+	ANDL	$~0x6000000a, DX
+	ORL	$0x80010000, DX			/* Paging Enable, Write Protect */
+	MOVL	DX, CR0
+
+	pFARJMP32(SELECTOR(KESEG, SELGDT, 0), _identity<>-KZERO(SB))
+
+/*
+ * Long mode. Welcome to 2003.
+ * Jump out of the identity map space;
+ * load a proper long mode GDT.
+ */
+MODE $64
+
+TEXT _identity<>(SB), 1, $-4
+	MOVQ	$_start64v<>(SB), AX
+	JMP*	AX
+
+TEXT _start64v<>(SB), 1, $-4
+	MOVQ	$_gdtptr64v<>(SB), AX
+	MOVL	(AX), GDTR
+
+	XORQ	AX, AX
+	MOVW	AX, DS				/* not used in long mode */
+	MOVW	AX, ES				/* not used in long mode */
+	MOVW	AX, FS
+	MOVW	AX, GS
+	MOVW	AX, SS				/* not used in long mode */
+
+	MOVW	AX, LDTR
+
+	MOVQ	$(CPU0MACH+MACHSIZE), SP
+	MOVQ	$(CPU0MACH), RMACH
+	MOVQ	AX, RUSER			/* up = 0; */
+
+_clearbss:
+	MOVQ	$edata(SB), DI
+	MOVQ	$end(SB), CX
+	SUBQ	DI, CX				/* end-edata bytes */
+	SHRQ	$2, CX				/* end-edata doublewords */
+
+	CLD
+	REP;	STOSL				/* clear BSS */
+
+	PUSHQ	AX				/* clear flags */
+	POPFQ
+
+	CALL	main(SB)
+
+/*
+ * The CPUID instruction is always supported on the amd64.
+ */
+TEXT cpuid(SB), $-4
+	MOVL	RARG, AX			/* function in AX */
+	CPUID
+
+	MOVQ	info+8(FP), BP
+	MOVL	AX, 0(BP)
+	MOVL	BX, 4(BP)
+	MOVL	CX, 8(BP)
+	MOVL	DX, 12(BP)
+	RET
+
+/*
+ * Port I/O.
+ */
+TEXT inb(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	XORL	AX, AX
+	INB
+	RET
+
+TEXT insb(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), DI
+	MOVL	count+16(FP), CX
+	CLD
+	REP;	INSB
+	RET
+
+TEXT ins(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	XORL	AX, AX
+	INW
+	RET
+
+TEXT inss(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), DI
+	MOVL	count+16(FP), CX
+	CLD
+	REP;	INSW
+	RET
+
+TEXT inl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	INL
+	RET
+
+TEXT insl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), DI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; INSL
+	RET
+
+TEXT outb(SB), 1, $-1
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVL	byte+8(FP), AX
+	OUTB
+	RET
+
+TEXT outsb(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), SI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; OUTSB
+	RET
+
+TEXT outs(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVL	short+8(FP), AX
+	OUTW
+	RET
+
+TEXT outss(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), SI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; OUTSW
+	RET
+
+TEXT outl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVL	long+8(FP), AX
+	OUTL
+	RET
+
+TEXT outsl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), SI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; OUTSL
+	RET
+
+TEXT getgdt(SB), 1, $-4
+	MOVQ	RARG, AX
+	MOVL	GDTR, (AX)			/* Note: 10 bytes returned */
+	RET
+
+TEXT lgdt(SB), $0				/* GDTR - global descriptor table */
+	MOVQ	RARG, AX
+	MOVL	(AX), GDTR
+	RET
+
+TEXT lidt(SB), $0				/* IDTR - interrupt descriptor table */
+	MOVQ	RARG, AX
+	MOVL	(AX), IDTR
+	RET
+
+TEXT ltr(SB), 1, $-4
+	MOVW	RARG, AX
+	MOVW	AX, TASK
+	RET
+
+/*
+ * Read/write various system registers.
+ */
+TEXT getcr0(SB), 1, $-4				/* Processor Control */
+	MOVQ	CR0, AX
+	RET
+
+TEXT putcr0(SB), 1, $-4
+	MOVQ	RARG, CR0
+	RET
+
+TEXT getcr2(SB), 1, $-4				/* #PF Linear Address */
+	MOVQ	CR2, AX
+	RET
+
+TEXT getcr3(SB), 1, $-4				/* PML4 Base */
+	MOVQ	CR3, AX
+	RET
+
+TEXT putcr3(SB), 1, $-4
+	MOVQ	RARG, CR3
+	RET
+
+TEXT getcr4(SB), 1, $-4				/* Extensions */
+	MOVQ	CR4, AX
+	RET
+
+TEXT putcr4(SB), 1, $-4
+	MOVQ	RARG, CR4
+	RET
+
+TEXT mb386(SB), 1, $-4				/* hack */
+TEXT mb586(SB), 1, $-4
+	XORL	AX, AX
+	CPUID
+	RET
+
+/*
+ * BIOS32.
+ */
+TEXT bios32call(SB), 1, $-1
+	XORL	AX, AX
+	INCL	AX
+	RET
+
+/*
+ * Basic timing loop to determine CPU frequency.
+ */
+TEXT aamloop(SB), 1, $-4
+	MOVL	RARG, CX
+_aamloop:
+	LOOP	_aamloop
+	RET
+
+TEXT _cycles(SB), 1, $-4			/* time stamp counter */
+	RDTSC
+	MOVL	AX, 0(RARG)			/* lo */
+	MOVL	DX, 4(RARG)			/* hi */
+	RET
+
+TEXT rdmsr(SB), 1, $-4				/* Model-Specific Register */
+	MOVL	RARG, CX
+	MOVQ	$0, BP
+TEXT _rdmsrinst(SB), $0
+	RDMSR
+	MOVQ	vlong+8(FP), CX			/* &vlong */
+	MOVL	AX, 0(CX)			/* lo */
+	MOVL	DX, 4(CX)			/* hi */
+	MOVQ	BP, AX				/* BP set to -1 if traped */
+	RET
+	
+TEXT wrmsr(SB), 1, $-4
+	MOVL	RARG, CX
+	MOVL	lo+8(FP), AX
+	MOVL	hi+12(FP), DX
+	MOVQ	$0, BP
+TEXT _wrmsrinst(SB), $0
+	WRMSR
+	MOVQ	BP, AX				/* BP set to -1 if traped */
+	RET
+
+TEXT invlpg(SB), 1, $-4				/* INVLPG va+0(FP) */
+	MOVQ	RARG, va+0(FP)
+
+	INVLPG	va+0(FP)
+
+	RET
+
+TEXT wbinvd(SB), 1, $-4
+	WBINVD
+	RET
+
+/*
+ * Serialisation.
+ */
+TEXT lfence(SB), 1, $-4
+	LFENCE
+	RET
+
+TEXT mfence(SB), 1, $-4
+	MFENCE
+	RET
+
+TEXT sfence(SB), 1, $-4
+	SFENCE
+	RET
+
+/*
+ * Note: CLI and STI are not serialising instructions.
+ * Is that assumed anywhere?
+ */
+TEXT splhi(SB), 1, $-4
+_splhi:
+	PUSHFQ
+	POPQ	AX
+	TESTQ	$0x200, AX			/* 0x200 - Interrupt Flag */
+	JZ	_alreadyhi			/* use CMOVLEQ etc. here? */
+
+	MOVQ	(SP), BX
+	MOVQ	BX, 8(RMACH) 			/* save PC in m->splpc */
+
+_alreadyhi:
+	CLI
+	RET
+
+TEXT spllo(SB), 1, $-4
+_spllo:
+	PUSHFQ
+	POPQ	AX
+	TESTQ	$0x200, AX			/* 0x200 - Interrupt Flag */
+	JNZ	_alreadylo			/* use CMOVLEQ etc. here? */
+
+	MOVQ	$0, 8(RMACH)			/* clear m->splpc */
+
+_alreadylo:
+	STI
+	RET
+
+TEXT splx(SB), 1, $-4
+	TESTQ	$0x200, RARG			/* 0x200 - Interrupt Flag */
+	JNZ	_spllo
+	JMP	_splhi
+
+TEXT spldone(SB), 1, $-4
+	RET
+
+TEXT islo(SB), 1, $-4
+	PUSHFQ
+	POPQ	AX
+	ANDQ	$0x200, AX			/* 0x200 - Interrupt Flag */
+	RET
+
+/*
+ * Synchronisation
+ */
+TEXT ainc8(SB), 1, $-4
+	XORL	AX, AX
+	INCL	AX
+	LOCK;	XADDB AX, (RARG)
+/* BOTCH	INCL	AX */
+	RET
+
+TEXT _xinc(SB), 1, $-4				/* int _inc(long*); */
+	MOVL	$1, AX
+	LOCK; XADDL AX, (RARG)
+	ADDL	$1, AX				/* overflow if -ve or 0 */
+	JGT	_return
+_trap:
+	XORQ	BX, BX
+	MOVQ	(BX), BX			/* over under sideways down */
+_return:
+	RET
+
+TEXT _xdec(SB), 1, $-4				/* int _dec(long*); */
+	MOVL	$-1, AX
+	LOCK; XADDL AX, (RARG)
+	SUBL	$1, AX				/* underflow if -ve */
+	JLT	_trap
+	RET
+
+TEXT tas(SB), 1, $-4
+	MOVL	$0xdeaddead, AX
+	XCHGL	AX, (RARG)			/*  */
+	RET
+
+TEXT fas64(SB), 1, $-4
+	MOVQ	p+8(FP), AX
+	LOCK; XCHGQ	AX, (RARG)			/*  */
+	RET
+
+TEXT cmpswap486(SB), 1, $-4
+TEXT cas(SB), 1, $-4
+	MOVL	exp+8(FP), AX
+	MOVL	new+16(FP), BX
+	LOCK; CMPXCHGL BX, (RARG)
+	MOVL	$1, AX				/* use CMOVLEQ etc. here? */
+	JNZ	_cas32r0
+_cas32r1:
+	RET
+_cas32r0:
+	DECL	AX
+	RET
+
+TEXT cas64(SB), 1, $-4
+	MOVQ	exp+8(FP), AX
+	MOVQ	new+16(FP), BX
+	LOCK; CMPXCHGQ BX, (RARG)
+	MOVL	$1, AX				/* use CMOVLEQ etc. here? */
+	JNZ	_cas64r0
+_cas64r1:
+	RET
+_cas64r0:
+	DECL	AX
+	RET
+
+/*
+ * Label consists of a stack pointer and a programme counter
+ */
+TEXT gotolabel(SB), 1, $-4
+	MOVQ	0(RARG), SP			/* restore SP */
+	MOVQ	8(RARG), AX			/* put return PC on the stack */
+	MOVQ	AX, 0(SP)
+	MOVL	$1, AX				/* return 1 */
+	RET
+
+TEXT setlabel(SB), 1, $-4
+	MOVQ	SP, 0(RARG)			/* store SP */
+	MOVQ	0(SP), BX			/* store return PC */
+	MOVQ	BX, 8(RARG)
+	MOVL	$0, AX				/* return 0 */
+	RET
+
+TEXT idle(SB), $0
+_idle:
+	STI
+	HLT
+	JMP	_idle
+
+TEXT halt(SB), 1, $-4
+	HLT
+	RET
+
+/*
+ * SIMD Floating Point.
+ * Note: for x87 instructions which have both a 'wait' and 'nowait' version,
+ * 8a only knows the 'wait' mnemonic but does NOT insertthe WAIT prefix byte
+ * (i.e. they act like their FNxxx variations) so WAIT instructions must be
+ * explicitly placed in the code if necessary.
+ */
+TEXT _clts(SB), 1, $-4
+	CLTS
+	RET
+
+TEXT _fldcw(SB), 1, $-4				/* Load x87 FPU Control Word */
+	MOVQ	RARG, cw+0(FP)
+	FLDCW	cw+0(FP)
+	RET
+
+TEXT _fnclex(SB), 1, $-4
+	FCLEX
+	RET
+
+TEXT _fninit(SB), 1, $-4
+	FINIT					/* no WAIT */
+	RET
+
+TEXT _fxrstor(SB), 1, $-4
+	FXRSTOR64 (RARG)
+	RET
+
+TEXT _fxsave(SB), 1, $-4
+	FXSAVE64 (RARG)
+	RET
+
+TEXT _fwait(SB), 1, $-4
+	WAIT
+	RET
+
+TEXT _ldmxcsr(SB), 1, $-4			/* Load MXCSR */
+	MOVQ	RARG, mxcsr+0(FP)
+	LDMXCSR	mxcsr+0(FP)
+	RET
+
+TEXT _stts(SB), 1, $-4
+	MOVQ	CR0, AX
+	ORQ	$8, AX				/* Ts */
+	MOVQ	AX, CR0
+	RET
+
+TEXT mul64fract(SB), 1, $-4
+	MOVQ	a+8(FP), AX
+	MULQ	b+16(FP)			/* a*b */
+	SHRQ	$32, AX:DX
+	MOVQ	AX, (RARG)
+	RET
+
+#define	RDRANDAX	BYTE $0x0f; BYTE $0xc7; BYTE $0xf0
+#define	RDRAND64AX	BYTE $0x48; BYTE $0x0f; BYTE $0xc7;  BYTE $0xf0
+
+TEXT rdrand32(SB), $-4
+loop32:
+	RDRANDAX
+	JCC		loop32
+	RET
+
+TEXT rdrand64(SB), $-4
+loop64:
+	RDRAND64AX
+	JCC		loop64
+	RET
+
+TEXT rdrandbuf(SB), $0
+	MOVQ	RARG, DX
+
+	MOVLQZX	cnt+8(FP), CX
+	SHRQ	$3, CX
+eights:
+	CMPL	CX, $0
+	JLE	f1
+	CALL	rdrand64(SB)
+	MOVQ	AX, 0(DX)
+	ADDQ	$8, DX
+	SUBL	$1, CX
+	JMP	eights
+
+f1:
+	MOVLQZX	cnt+8(FP), CX
+	ANDL	$7, CX
+	SHRQ	$2, CX
+fours:
+	CMPL	CX, $0
+	JLE	f2
+	CALL	rdrand32(SB)
+	MOVL	AX, 0(DX)
+	ADDQ	$4, DX
+	SUBL	$1, CX
+	JMP	fours
+
+f2:
+	MOVLQZX	cnt+8(FP), CX
+	ANDL	$3, CX
+ones:
+	CMPL	CX, $0
+	JLE	f3
+	CALL	rdrand32(SB)
+	MOVB	AX, 0(DX)
+	ADDQ	$1, DX
+	SUBL	$1, CX
+	JMP	ones
+
+f3:
+	RET
+
+/*
+ */
+TEXT touser(SB), 1, $-4
+	CLI
+	SWAPGS
+	MOVQ	$UDSEL, AX
+	MOVW	AX, DS
+	MOVW	AX, ES
+	MOVW	AX, FS
+	MOVW	AX, GS
+
+	MOVQ	$(UTZERO+0x28), CX		/* ip */
+	MOVQ	$0x200, R11			/* flags */
+
+	MOVQ	RARG, SP			/* sp */
+
+	BYTE $0x48; SYSRET			/* SYSRETQ */
+
+/*
+ */
+TEXT syscallentry(SB), 1, $-4
+	SWAPGS
+	BYTE $0x65; MOVQ 0, RMACH		/* m-> (MOVQ GS:0x0, R15) */
+	MOVQ	16(RMACH), RUSER		/* m->proc */
+	MOVQ	SP, R13
+	MOVQ	16(RUSER), SP			/* m->proc->kstack */
+	ADDQ	$KSTACK, SP
+	PUSHQ	$UDSEL				/* old stack segment */
+	PUSHQ	R13				/* old sp */
+	PUSHQ	R11				/* old flags */
+	PUSHQ	$UESEL				/* old code segment */
+	PUSHQ	CX				/* old ip */
+
+	SUBQ	$(17*8), SP			/* unsaved registers */
+	PUSHQ	RARG				/* system call number */
+
+	MOVW	$UDSEL, (15*8+0)(SP)
+	MOVW	ES, (15*8+2)(SP)
+	MOVW	FS, (15*8+4)(SP)
+	MOVW	GS, (15*8+6)(SP)
+
+	MOVQ	SP, RARG
+	PUSHQ	SP				/* Ureg* */
+	CALL	syscall(SB)
+
+TEXT forkret(SB), 1, $-4
+	MOVQ	8(SP), AX			/* Ureg.ax */
+	MOVQ	(8+6*8)(SP), BP			/* Ureg.bp */
+	ADDQ	$(16*8), SP			/* registers + arguments */
+
+	CLI
+	SWAPGS
+	MOVW	0(SP), DS
+	MOVW	2(SP), ES
+	MOVW	4(SP), FS
+	MOVW	6(SP), GS
+
+	MOVQ	24(SP), CX			/* ip */
+	MOVQ	40(SP), R11			/* flags */
+
+	MOVQ	48(SP), SP			/* sp */
+
+	BYTE $0x48; SYSRET			/* SYSRETQ */
+
+/*
+ * Interrupt/exception handling.
+ */
+
+TEXT _strayintr(SB), 1, $-4			/* no error code pushed */
+	PUSHQ	AX				/* save AX */
+	MOVQ	8(SP), AX			/* vectortable(SB) PC */
+	JMP	_intrcommon
+
+TEXT _strayintrx(SB), 1, $-4			/* error code pushed */
+	XCHGQ	AX, (SP)
+_intrcommon:
+	MOVBQZX	(AX), AX
+	XCHGQ	AX, (SP)
+
+	SUBQ	$24, SP				/* R1[45], [DEFG]S */
+	CMPW	48(SP), $KESEL			/* old CS */
+	JEQ	_intrnested
+
+	MOVQ	RUSER, 0(SP)
+	MOVQ	RMACH, 8(SP)
+	MOVW	DS, 16(SP)
+	MOVW	ES, 18(SP)
+	MOVW	FS, 20(SP)
+	MOVW	GS, 22(SP)
+
+	SWAPGS
+	BYTE $0x65; MOVQ 0, RMACH		/* m-> (MOVQ GS:0x0, R15) */
+	MOVQ	16(RMACH), RUSER		/* up */
+
+_intrnested:
+	PUSHQ	R13
+	PUSHQ	R12
+	PUSHQ	R11
+	PUSHQ	R10
+	PUSHQ	R9
+	PUSHQ	R8
+	PUSHQ	BP
+	PUSHQ	DI
+	PUSHQ	SI
+	PUSHQ	DX
+	PUSHQ	CX
+	PUSHQ	BX
+	PUSHQ	AX
+
+	MOVQ	SP, RARG
+	PUSHQ	SP
+	CALL	trap(SB)
+
+	POPQ	AX
+
+	POPQ	AX
+	POPQ	BX
+	POPQ	CX
+	POPQ	DX
+	POPQ	SI
+	POPQ	DI
+	POPQ	BP
+	POPQ	R8
+	POPQ	R9
+	POPQ	R10
+	POPQ	R11
+	POPQ	R12
+	POPQ	R13
+
+	CMPQ	48(SP), $KESEL
+	JEQ	_iretnested
+
+	SWAPGS
+	MOVW	22(SP), GS
+	MOVW	20(SP), FS
+	MOVW	18(SP), ES
+	MOVW	16(SP), DS
+	MOVQ	8(SP), RMACH
+	MOVQ	0(SP), RUSER
+
+_iretnested:
+	ADDQ	$40, SP
+	IRETQ
+
+TEXT vectortable(SB), $0
+	CALL _strayintr(SB); BYTE $0x00		/* divide error */
+	CALL _strayintr(SB); BYTE $0x01		/* debug exception */
+	CALL _strayintr(SB); BYTE $0x02		/* NMI interrupt */
+	CALL _strayintr(SB); BYTE $0x03		/* breakpoint */
+	CALL _strayintr(SB); BYTE $0x04		/* overflow */
+	CALL _strayintr(SB); BYTE $0x05		/* bound */
+	CALL _strayintr(SB); BYTE $0x06		/* invalid opcode */
+	CALL _strayintr(SB); BYTE $0x07		/* no coprocessor available */
+	CALL _strayintrx(SB); BYTE $0x08	/* double fault */
+	CALL _strayintr(SB); BYTE $0x09		/* coprocessor segment overflow */
+	CALL _strayintrx(SB); BYTE $0x0A	/* invalid TSS */
+	CALL _strayintrx(SB); BYTE $0x0B	/* segment not available */
+	CALL _strayintrx(SB); BYTE $0x0C	/* stack exception */
+	CALL _strayintrx(SB); BYTE $0x0D	/* general protection error */
+	CALL _strayintrx(SB); BYTE $0x0E	/* page fault */
+	CALL _strayintr(SB); BYTE $0x0F		/*  */
+	CALL _strayintr(SB); BYTE $0x10		/* coprocessor error */
+	CALL _strayintrx(SB); BYTE $0x11	/* alignment check */
+	CALL _strayintr(SB); BYTE $0x12		/* machine check */
+	CALL _strayintr(SB); BYTE $0x13
+	CALL _strayintr(SB); BYTE $0x14
+	CALL _strayintr(SB); BYTE $0x15
+	CALL _strayintr(SB); BYTE $0x16
+	CALL _strayintr(SB); BYTE $0x17
+	CALL _strayintr(SB); BYTE $0x18
+	CALL _strayintr(SB); BYTE $0x19
+	CALL _strayintr(SB); BYTE $0x1A
+	CALL _strayintr(SB); BYTE $0x1B
+	CALL _strayintr(SB); BYTE $0x1C
+	CALL _strayintr(SB); BYTE $0x1D
+	CALL _strayintr(SB); BYTE $0x1E
+	CALL _strayintr(SB); BYTE $0x1F
+	CALL _strayintr(SB); BYTE $0x20		/* VectorLAPIC */
+	CALL _strayintr(SB); BYTE $0x21
+	CALL _strayintr(SB); BYTE $0x22
+	CALL _strayintr(SB); BYTE $0x23
+	CALL _strayintr(SB); BYTE $0x24
+	CALL _strayintr(SB); BYTE $0x25
+	CALL _strayintr(SB); BYTE $0x26
+	CALL _strayintr(SB); BYTE $0x27
+	CALL _strayintr(SB); BYTE $0x28
+	CALL _strayintr(SB); BYTE $0x29
+	CALL _strayintr(SB); BYTE $0x2A
+	CALL _strayintr(SB); BYTE $0x2B
+	CALL _strayintr(SB); BYTE $0x2C
+	CALL _strayintr(SB); BYTE $0x2D
+	CALL _strayintr(SB); BYTE $0x2E
+	CALL _strayintr(SB); BYTE $0x2F
+	CALL _strayintr(SB); BYTE $0x30
+	CALL _strayintr(SB); BYTE $0x31
+	CALL _strayintr(SB); BYTE $0x32
+	CALL _strayintr(SB); BYTE $0x33
+	CALL _strayintr(SB); BYTE $0x34
+	CALL _strayintr(SB); BYTE $0x35
+	CALL _strayintr(SB); BYTE $0x36
+	CALL _strayintr(SB); BYTE $0x37
+	CALL _strayintr(SB); BYTE $0x38
+	CALL _strayintr(SB); BYTE $0x39
+	CALL _strayintr(SB); BYTE $0x3A
+	CALL _strayintr(SB); BYTE $0x3B
+	CALL _strayintr(SB); BYTE $0x3C
+	CALL _strayintr(SB); BYTE $0x3D
+	CALL _strayintr(SB); BYTE $0x3E
+	CALL _strayintr(SB); BYTE $0x3F
+	CALL _strayintr(SB); BYTE $0x40		/* was VectorSYSCALL */
+	CALL _strayintr(SB); BYTE $0x41
+	CALL _strayintr(SB); BYTE $0x42
+	CALL _strayintr(SB); BYTE $0x43
+	CALL _strayintr(SB); BYTE $0x44
+	CALL _strayintr(SB); BYTE $0x45
+	CALL _strayintr(SB); BYTE $0x46
+	CALL _strayintr(SB); BYTE $0x47
+	CALL _strayintr(SB); BYTE $0x48
+	CALL _strayintr(SB); BYTE $0x49
+	CALL _strayintr(SB); BYTE $0x4A
+	CALL _strayintr(SB); BYTE $0x4B
+	CALL _strayintr(SB); BYTE $0x4C
+	CALL _strayintr(SB); BYTE $0x4D
+	CALL _strayintr(SB); BYTE $0x4E
+	CALL _strayintr(SB); BYTE $0x4F
+	CALL _strayintr(SB); BYTE $0x50
+	CALL _strayintr(SB); BYTE $0x51
+	CALL _strayintr(SB); BYTE $0x52
+	CALL _strayintr(SB); BYTE $0x53
+	CALL _strayintr(SB); BYTE $0x54
+	CALL _strayintr(SB); BYTE $0x55
+	CALL _strayintr(SB); BYTE $0x56
+	CALL _strayintr(SB); BYTE $0x57
+	CALL _strayintr(SB); BYTE $0x58
+	CALL _strayintr(SB); BYTE $0x59
+	CALL _strayintr(SB); BYTE $0x5A
+	CALL _strayintr(SB); BYTE $0x5B
+	CALL _strayintr(SB); BYTE $0x5C
+	CALL _strayintr(SB); BYTE $0x5D
+	CALL _strayintr(SB); BYTE $0x5E
+	CALL _strayintr(SB); BYTE $0x5F
+	CALL _strayintr(SB); BYTE $0x60
+	CALL _strayintr(SB); BYTE $0x61
+	CALL _strayintr(SB); BYTE $0x62
+	CALL _strayintr(SB); BYTE $0x63
+	CALL _strayintr(SB); BYTE $0x64
+	CALL _strayintr(SB); BYTE $0x65
+	CALL _strayintr(SB); BYTE $0x66
+	CALL _strayintr(SB); BYTE $0x67
+	CALL _strayintr(SB); BYTE $0x68
+	CALL _strayintr(SB); BYTE $0x69
+	CALL _strayintr(SB); BYTE $0x6A
+	CALL _strayintr(SB); BYTE $0x6B
+	CALL _strayintr(SB); BYTE $0x6C
+	CALL _strayintr(SB); BYTE $0x6D
+	CALL _strayintr(SB); BYTE $0x6E
+	CALL _strayintr(SB); BYTE $0x6F
+	CALL _strayintr(SB); BYTE $0x70
+	CALL _strayintr(SB); BYTE $0x71
+	CALL _strayintr(SB); BYTE $0x72
+	CALL _strayintr(SB); BYTE $0x73
+	CALL _strayintr(SB); BYTE $0x74
+	CALL _strayintr(SB); BYTE $0x75
+	CALL _strayintr(SB); BYTE $0x76
+	CALL _strayintr(SB); BYTE $0x77
+	CALL _strayintr(SB); BYTE $0x78
+	CALL _strayintr(SB); BYTE $0x79
+	CALL _strayintr(SB); BYTE $0x7A
+	CALL _strayintr(SB); BYTE $0x7B
+	CALL _strayintr(SB); BYTE $0x7C
+	CALL _strayintr(SB); BYTE $0x7D
+	CALL _strayintr(SB); BYTE $0x7E
+	CALL _strayintr(SB); BYTE $0x7F
+	CALL _strayintr(SB); BYTE $0x80		/* Vector[A]PIC */
+	CALL _strayintr(SB); BYTE $0x81
+	CALL _strayintr(SB); BYTE $0x82
+	CALL _strayintr(SB); BYTE $0x83
+	CALL _strayintr(SB); BYTE $0x84
+	CALL _strayintr(SB); BYTE $0x85
+	CALL _strayintr(SB); BYTE $0x86
+	CALL _strayintr(SB); BYTE $0x87
+	CALL _strayintr(SB); BYTE $0x88
+	CALL _strayintr(SB); BYTE $0x89
+	CALL _strayintr(SB); BYTE $0x8A
+	CALL _strayintr(SB); BYTE $0x8B
+	CALL _strayintr(SB); BYTE $0x8C
+	CALL _strayintr(SB); BYTE $0x8D
+	CALL _strayintr(SB); BYTE $0x8E
+	CALL _strayintr(SB); BYTE $0x8F
+	CALL _strayintr(SB); BYTE $0x90
+	CALL _strayintr(SB); BYTE $0x91
+	CALL _strayintr(SB); BYTE $0x92
+	CALL _strayintr(SB); BYTE $0x93
+	CALL _strayintr(SB); BYTE $0x94
+	CALL _strayintr(SB); BYTE $0x95
+	CALL _strayintr(SB); BYTE $0x96
+	CALL _strayintr(SB); BYTE $0x97
+	CALL _strayintr(SB); BYTE $0x98
+	CALL _strayintr(SB); BYTE $0x99
+	CALL _strayintr(SB); BYTE $0x9A
+	CALL _strayintr(SB); BYTE $0x9B
+	CALL _strayintr(SB); BYTE $0x9C
+	CALL _strayintr(SB); BYTE $0x9D
+	CALL _strayintr(SB); BYTE $0x9E
+	CALL _strayintr(SB); BYTE $0x9F
+	CALL _strayintr(SB); BYTE $0xA0
+	CALL _strayintr(SB); BYTE $0xA1
+	CALL _strayintr(SB); BYTE $0xA2
+	CALL _strayintr(SB); BYTE $0xA3
+	CALL _strayintr(SB); BYTE $0xA4
+	CALL _strayintr(SB); BYTE $0xA5
+	CALL _strayintr(SB); BYTE $0xA6
+	CALL _strayintr(SB); BYTE $0xA7
+	CALL _strayintr(SB); BYTE $0xA8
+	CALL _strayintr(SB); BYTE $0xA9
+	CALL _strayintr(SB); BYTE $0xAA
+	CALL _strayintr(SB); BYTE $0xAB
+	CALL _strayintr(SB); BYTE $0xAC
+	CALL _strayintr(SB); BYTE $0xAD
+	CALL _strayintr(SB); BYTE $0xAE
+	CALL _strayintr(SB); BYTE $0xAF
+	CALL _strayintr(SB); BYTE $0xB0
+	CALL _strayintr(SB); BYTE $0xB1
+	CALL _strayintr(SB); BYTE $0xB2
+	CALL _strayintr(SB); BYTE $0xB3
+	CALL _strayintr(SB); BYTE $0xB4
+	CALL _strayintr(SB); BYTE $0xB5
+	CALL _strayintr(SB); BYTE $0xB6
+	CALL _strayintr(SB); BYTE $0xB7
+	CALL _strayintr(SB); BYTE $0xB8
+	CALL _strayintr(SB); BYTE $0xB9
+	CALL _strayintr(SB); BYTE $0xBA
+	CALL _strayintr(SB); BYTE $0xBB
+	CALL _strayintr(SB); BYTE $0xBC
+	CALL _strayintr(SB); BYTE $0xBD
+	CALL _strayintr(SB); BYTE $0xBE
+	CALL _strayintr(SB); BYTE $0xBF
+	CALL _strayintr(SB); BYTE $0xC0
+	CALL _strayintr(SB); BYTE $0xC1
+	CALL _strayintr(SB); BYTE $0xC2
+	CALL _strayintr(SB); BYTE $0xC3
+	CALL _strayintr(SB); BYTE $0xC4
+	CALL _strayintr(SB); BYTE $0xC5
+	CALL _strayintr(SB); BYTE $0xC6
+	CALL _strayintr(SB); BYTE $0xC7
+	CALL _strayintr(SB); BYTE $0xC8
+	CALL _strayintr(SB); BYTE $0xC9
+	CALL _strayintr(SB); BYTE $0xCA
+	CALL _strayintr(SB); BYTE $0xCB
+	CALL _strayintr(SB); BYTE $0xCC
+	CALL _strayintr(SB); BYTE $0xCD
+	CALL _strayintr(SB); BYTE $0xCE
+	CALL _strayintr(SB); BYTE $0xCF
+	CALL _strayintr(SB); BYTE $0xD0
+	CALL _strayintr(SB); BYTE $0xD1
+	CALL _strayintr(SB); BYTE $0xD2
+	CALL _strayintr(SB); BYTE $0xD3
+	CALL _strayintr(SB); BYTE $0xD4
+	CALL _strayintr(SB); BYTE $0xD5
+	CALL _strayintr(SB); BYTE $0xD6
+	CALL _strayintr(SB); BYTE $0xD7
+	CALL _strayintr(SB); BYTE $0xD8
+	CALL _strayintr(SB); BYTE $0xD9
+	CALL _strayintr(SB); BYTE $0xDA
+	CALL _strayintr(SB); BYTE $0xDB
+	CALL _strayintr(SB); BYTE $0xDC
+	CALL _strayintr(SB); BYTE $0xDD
+	CALL _strayintr(SB); BYTE $0xDE
+	CALL _strayintr(SB); BYTE $0xDF
+	CALL _strayintr(SB); BYTE $0xE0
+	CALL _strayintr(SB); BYTE $0xE1
+	CALL _strayintr(SB); BYTE $0xE2
+	CALL _strayintr(SB); BYTE $0xE3
+	CALL _strayintr(SB); BYTE $0xE4
+	CALL _strayintr(SB); BYTE $0xE5
+	CALL _strayintr(SB); BYTE $0xE6
+	CALL _strayintr(SB); BYTE $0xE7
+	CALL _strayintr(SB); BYTE $0xE8
+	CALL _strayintr(SB); BYTE $0xE9
+	CALL _strayintr(SB); BYTE $0xEA
+	CALL _strayintr(SB); BYTE $0xEB
+	CALL _strayintr(SB); BYTE $0xEC
+	CALL _strayintr(SB); BYTE $0xED
+	CALL _strayintr(SB); BYTE $0xEE
+	CALL _strayintr(SB); BYTE $0xEF
+	CALL _strayintr(SB); BYTE $0xF0
+	CALL _strayintr(SB); BYTE $0xF1
+	CALL _strayintr(SB); BYTE $0xF2
+	CALL _strayintr(SB); BYTE $0xF3
+	CALL _strayintr(SB); BYTE $0xF4
+	CALL _strayintr(SB); BYTE $0xF5
+	CALL _strayintr(SB); BYTE $0xF6
+	CALL _strayintr(SB); BYTE $0xF7
+	CALL _strayintr(SB); BYTE $0xF8
+	CALL _strayintr(SB); BYTE $0xF9
+	CALL _strayintr(SB); BYTE $0xFA
+	CALL _strayintr(SB); BYTE $0xFB
+	CALL _strayintr(SB); BYTE $0xFC
+	CALL _strayintr(SB); BYTE $0xFD
+	CALL _strayintr(SB); BYTE $0xFE
+	CALL _strayintr(SB); BYTE $0xFF
--- /dev/null
+++ b/sys/src/9/pc64/main.c
@@ -1,0 +1,742 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"tos.h"
+#include	"ureg.h"
+#include	"init.h"
+#include	"pool.h"
+
+/*
+ * Where configuration info is left for the loaded programme.
+ * This will turn into a structure as more is done by the boot loader
+ * (e.g. why parse the .ini file twice?).
+ * There are 3584 bytes available at CONFADDR.
+ */
+#define BOOTLINE	((char*)CONFADDR)
+#define BOOTLINELEN	64
+#define BOOTARGS	((char*)(CONFADDR+BOOTLINELEN))
+#define	BOOTARGSLEN	(4096-0x200-BOOTLINELEN)
+#define	MAXCONF		64
+
+Conf conf;
+char *confname[MAXCONF];
+char *confval[MAXCONF];
+int nconf;
+int delaylink;
+uchar *sp;	/* user stack of init proc */
+
+extern void (*i8237alloc)(void);
+
+static void
+options(void)
+{
+	long i, n;
+	char *cp, *line[MAXCONF], *p, *q;
+
+	// multibootargs();
+
+	/*
+	 *  parse configuration args from dos file plan9.ini
+	 */
+	cp = BOOTARGS;	/* where b.com leaves its config */
+	cp[BOOTARGSLEN-1] = 0;
+
+	/*
+	 * Strip out '\r', change '\t' -> ' '.
+	 */
+	p = cp;
+	for(q = cp; *q; q++){
+		if(*q == '\r')
+			continue;
+		if(*q == '\t')
+			*q = ' ';
+		*p++ = *q;
+	}
+	*p = 0;
+
+	n = getfields(cp, line, MAXCONF, 1, "\n");
+	for(i = 0; i < n; i++){
+		if(*line[i] == '#')
+			continue;
+		cp = strchr(line[i], '=');
+		if(cp == nil)
+			continue;
+		*cp++ = '\0';
+		confname[nconf] = line[i];
+		confval[nconf] = cp;
+		nconf++;
+	}
+}
+
+char*
+getconf(char *name)
+{
+	int i;
+
+	for(i = 0; i < nconf; i++)
+		if(cistrcmp(confname[i], name) == 0)
+			return confval[i];
+	return 0;
+}
+
+void
+confinit(void)
+{
+	char *p;
+	int i, userpcnt;
+	ulong kpages;
+
+	if(p = getconf("*kernelpercent"))
+		userpcnt = 100 - strtol(p, 0, 0);
+	else
+		userpcnt = 0;
+
+	conf.npage = 0;
+	for(i=0; i<nelem(conf.mem); i++)
+		conf.npage += conf.mem[i].npage;
+
+	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+	if(cpuserver)
+		conf.nproc *= 3;
+	if(conf.nproc > 2000)
+		conf.nproc = 2000;
+	conf.nimage = 200;
+	conf.nswap = conf.nproc*80;
+	conf.nswppo = 4096;
+
+	if(cpuserver) {
+		if(userpcnt < 10)
+			userpcnt = 70;
+		kpages = conf.npage - (conf.npage*userpcnt)/100;
+
+		/*
+		 * Hack for the big boys. Only good while physmem < 4GB.
+		 * Give the kernel fixed max + enough to allocate the
+		 * page pool.
+		 * This is an overestimate as conf.upages < conf.npages.
+		 * The patch of nimage is a band-aid, scanning the whole
+		 * page list in imagereclaim just takes too long.
+		 */
+		if(getconf("*imagemaxmb") == 0)
+		if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){
+			kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG;
+			conf.nimage = 2000;
+			kpages += (conf.nproc*KSTACK)/BY2PG;
+		}
+	} else {
+		if(userpcnt < 10) {
+			if(conf.npage*BY2PG < 16*MB)
+				userpcnt = 50;
+			else
+				userpcnt = 60;
+		}
+		kpages = conf.npage - (conf.npage*userpcnt)/100;
+
+		/*
+		 * Make sure terminals with low memory get at least
+		 * 4MB on the first Image chunk allocation.
+		 */
+		if(conf.npage*BY2PG < 16*MB)
+			imagmem->minarena = 4*MB;
+	}
+
+	/*
+	 * can't go past the end of virtual memory.
+	 */
+	if(kpages > ((uintptr)-KZERO)/BY2PG)
+		kpages = ((uintptr)-KZERO)/BY2PG;
+
+	conf.upages = conf.npage - kpages;
+	conf.ialloc = (kpages/2)*BY2PG;
+
+	/*
+	 * Guess how much is taken by the large permanent
+	 * datastructures. Mntcache and Mntrpc are not accounted for
+	 * (probably ~300KB).
+	 */
+	kpages *= BY2PG;
+	kpages -= conf.upages*sizeof(Page)
+		+ conf.nproc*sizeof(Proc)
+		+ conf.nimage*sizeof(Image)
+		+ conf.nswap
+		+ conf.nswppo*sizeof(Page*);
+	mainmem->maxsize = kpages;
+
+	/*
+	 * the dynamic allocation will balance the load properly,
+	 * hopefully. be careful with 32-bit overflow.
+	 */
+	imagmem->maxsize = kpages - (kpages/10);
+	if(p = getconf("*imagemaxmb")){
+		imagmem->maxsize = strtol(p, nil, 0)*MB;
+		if(imagmem->maxsize > mainmem->maxsize)
+			imagmem->maxsize = mainmem->maxsize;
+	}
+}
+
+
+void
+machinit(void)
+{
+	int machno;
+	Segdesc *gdt;
+	uintptr *pml4;
+
+	machno = m->machno;
+	pml4 = m->pml4;
+	gdt = m->gdt;
+	memset(m, 0, sizeof(Mach));
+	m->machno = machno;
+	m->pml4 = pml4;
+	m->gdt = gdt;
+	m->perf.period = 1;
+
+	/*
+	 * For polled uart output at boot, need
+	 * a default delay constant. 100000 should
+	 * be enough for a while. Cpuidentify will
+	 * calculate the real value later.
+	 */
+	m->loopconst = 100000;
+}
+
+void
+mach0init(void)
+{
+	conf.nmach = 1;
+
+	MACHP(0) = (Mach*)CPU0MACH;
+
+	m->machno = 0;
+	m->pml4 = (u64int*)CPU0PML4;
+	m->gdt = (Segdesc*)CPU0GDT;
+
+	machinit();
+
+	active.machs = 1;
+	active.exiting = 0;
+}
+
+
+uchar *
+pusharg(char *p)
+{
+	int n;
+
+	n = strlen(p)+1;
+	sp -= n;
+	memmove(sp, p, n);
+	return sp;
+}
+
+void
+bootargs(void *base)
+{
+ 	int i, ac;
+	uchar *av[32];
+	uchar **lsp;
+	char *cp = BOOTLINE;
+	char buf[64];
+
+	sp = (uchar*)base + BY2PG - sizeof(Tos);
+
+	ac = 0;
+	av[ac++] = pusharg("boot");
+
+	/* when boot is changed to only use rc, this code can go away */
+	cp[BOOTLINELEN-1] = 0;
+	buf[0] = 0;
+	if(strncmp(cp, "fd", 2) == 0){
+		sprint(buf, "local!#f/fd%lddisk", strtol(cp+2, 0, 0));
+		av[ac++] = pusharg(buf);
+	} else if(strncmp(cp, "sd", 2) == 0){
+		sprint(buf, "local!#S/sd%c%c/fs", *(cp+2), *(cp+3));
+		av[ac++] = pusharg(buf);
+	} else if(strncmp(cp, "ether", 5) == 0)
+		av[ac++] = pusharg("-n");
+
+	/* 8 byte word align stack */
+	sp = (uchar*)((uintptr)sp & ~7);
+
+	/* build argc, argv on stack */
+	sp -= (ac+1)*sizeof(sp);
+	lsp = (uchar**)sp;
+	for(i = 0; i < ac; i++)
+		lsp[i] = av[i] + ((uintptr)(USTKTOP - BY2PG) - (uintptr)base);
+	lsp[i] = 0;
+	sp += (uintptr)(USTKTOP - BY2PG) - (uintptr)base;
+	sp -= BY2WD;
+}
+
+void
+init0(void)
+{
+	int i;
+	char buf[2*KNAMELEN];
+
+	up->nerrlab = 0;
+
+	spllo();
+
+	/*
+	 * These are o.k. because rootinit is null.
+	 * Then early kproc's will have a root and dot.
+	 */
+	up->slash = namec("#/", Atodir, 0, 0);
+	pathclose(up->slash->path);
+	up->slash->path = newpath("/");
+	up->dot = cclone(up->slash);
+
+	chandevinit();
+
+	if(!waserror()){
+		snprint(buf, sizeof(buf), "%s %s", arch->id, conffile);
+		ksetenv("terminal", buf, 0);
+		ksetenv("cputype", "amd64", 0);
+		if(cpuserver)
+			ksetenv("service", "cpu", 0);
+		else
+			ksetenv("service", "terminal", 0);
+		for(i = 0; i < nconf; i++){
+			if(confname[i][0] != '*')
+				ksetenv(confname[i], confval[i], 0);
+			ksetenv(confname[i], confval[i], 1);
+		}
+		poperror();
+	}
+	kproc("alarm", alarmkproc, 0);
+
+	touser(sp);
+}
+
+void
+userinit(void)
+{
+	void *v;
+	Proc *p;
+	Segment *s;
+	Page *pg;
+
+	p = newproc();
+	p->pgrp = newpgrp();
+	p->egrp = smalloc(sizeof(Egrp));
+	p->egrp->ref = 1;
+	p->fgrp = dupfgrp(nil);
+	p->rgrp = newrgrp();
+	p->procmode = 0640;
+
+	kstrdup(&eve, "");
+	kstrdup(&p->text, "*init*");
+	kstrdup(&p->user, eve);
+
+	procsetup(p);
+
+	/*
+	 * Kernel Stack
+	 *
+	 * N.B. make sure there's enough space for syscall to check
+	 *	for valid args and 
+	 *	8 bytes for gotolabel's return PC
+	 */
+	p->sched.pc = (uintptr)init0;
+	p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD);
+
+	/*
+	 * User Stack
+	 */
+	s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
+	p->seg[SSEG] = s;
+	pg = newpage(0, 0, USTKTOP-BY2PG);
+	v = kmap(pg);
+	memset(v, 0, BY2PG);
+	segpage(s, pg);
+	bootargs(v);
+	kunmap(v);
+
+	/*
+	 * Text
+	 */
+	s = newseg(SG_TEXT, UTZERO, 1);
+	s->flushme++;
+	p->seg[TSEG] = s;
+	pg = newpage(0, 0, UTZERO);
+	memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
+	segpage(s, pg);
+	v = kmap(pg);
+	memset(v, 0, BY2PG);
+	memmove(v, initcode, sizeof initcode);
+	kunmap(v);
+
+	ready(p);
+}
+
+void
+main()
+{
+	mach0init();
+	options();
+	ioinit();
+	// i8250console();
+	quotefmtinstall();
+	screeninit();
+	trapinit0();
+	kbdinit();
+	i8253init();
+	cpuidentify();
+	meminit();
+	confinit();
+	archinit();
+	xinit();
+	if(i8237alloc != nil)
+		i8237alloc();
+	trapinit();
+	printinit();
+	cpuidprint();
+	mmuinit();
+	if(arch->intrinit)
+		arch->intrinit();
+	timersinit();
+	mathinit();
+	kbdenable();
+	if(arch->clockenable)
+		arch->clockenable();
+	procinit0();
+	initseg();
+	if(delaylink){
+		bootlinks();
+		pcimatch(0, 0, 0);
+	}else
+		links();
+	conf.monitor = 1;
+	chandevreset();
+	pageinit();
+	swapinit();
+	userinit();
+	active.thunderbirdsarego = 1;
+	schedinit();
+}
+
+void
+exit(int)
+{
+	print("exit\n");
+	splhi();
+	for(;;);
+}
+
+void
+reboot(void*, void*, ulong)
+{
+	exit(0);
+}
+
+void
+idlehands(void)
+{
+	halt();
+}
+
+/*
+ * SIMD Floating Point.
+ * Assembler support to get at the individual instructions
+ * is in l.s.
+ * There are opportunities to be lazier about saving and
+ * restoring the state and allocating the storage needed.
+ */
+extern void _clts(void);
+extern void _fldcw(u16int);
+extern void _fnclex(void);
+extern void _fninit(void);
+extern void _fxrstor(Fxsave*);
+extern void _fxsave(Fxsave*);
+extern void _fwait(void);
+extern void _ldmxcsr(u32int);
+extern void _stts(void);
+
+/*
+ * not used, AMD64 mandated SSE
+ */
+void
+fpx87save(FPsave*)
+{
+}
+void
+fpx87restore(FPsave*)
+{
+}
+
+void
+fpssesave(FPsave *fps)
+{
+	Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign);
+
+	_fxsave(fx);
+	_stts();
+	if(fx != (Fxsave*)fps)
+		memmove((Fxsave*)fps, fx, sizeof(Fxsave));
+}
+void
+fpsserestore(FPsave *fps)
+{
+	Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign);
+
+	if(fx != (Fxsave*)fps)
+		memmove(fx, (Fxsave*)fps, sizeof(Fxsave));
+	_clts();
+	_fxrstor(fx);
+}
+
+static char* mathmsg[] =
+{
+	nil,	/* handled below */
+	"denormalized operand",
+	"division by zero",
+	"numeric overflow",
+	"numeric underflow",
+	"precision loss",
+};
+
+static void
+mathnote(ulong status, uintptr pc)
+{
+	char *msg, note[ERRMAX];
+	int i;
+
+	/*
+	 * Some attention should probably be paid here to the
+	 * exception masks and error summary.
+	 */
+	msg = "unknown exception";
+	for(i = 1; i <= 5; i++){
+		if(!((1<<i) & status))
+			continue;
+		msg = mathmsg[i];
+		break;
+	}
+	if(status & 0x01){
+		if(status & 0x40){
+			if(status & 0x200)
+				msg = "stack overflow";
+			else
+				msg = "stack underflow";
+		}else
+			msg = "invalid operation";
+	}
+	snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux",
+		msg, pc, status);
+	postnote(up, 1, note, NDebug);
+}
+
+/*
+ *  math coprocessor error
+ */
+static void
+matherror(Ureg*, void*)
+{
+	/*
+	 * Save FPU state to check out the error.
+	 */
+	fpsave(&up->fpsave);
+	up->fpstate = FPinactive;
+	mathnote(up->fpsave.fsw, up->fpsave.rip);
+}
+
+/*
+ *  math coprocessor emulation fault
+ */
+static void
+mathemu(Ureg *ureg, void*)
+{
+	ulong status, control;
+
+	if(up->fpstate & FPillegal){
+		/* someone did floating point in a note handler */
+		postnote(up, 1, "sys: floating point in note handler", NDebug);
+		return;
+	}
+	switch(up->fpstate){
+	case FPinit:
+		/*
+		 * A process tries to use the FPU for the
+		 * first time and generates a 'device not available'
+		 * exception.
+		 * Turn the FPU on and initialise it for use.
+		 * Set the precision and mask the exceptions
+		 * we don't care about from the generic Mach value.
+		 */
+		_clts();
+		_fninit();
+		_fwait();
+		_fldcw(0x0232);
+		/*
+		 * TODO: sse exceptions
+		 * _ldmxcsr(m->mxcsr);
+		 *
+		 */
+		up->fpstate = FPactive;
+		break;
+	case FPinactive:
+		/*
+		 * Before restoring the state, check for any pending
+		 * exceptions, there's no way to restore the state without
+		 * generating an unmasked exception.
+		 * More attention should probably be paid here to the
+		 * exception masks and error summary.
+		 */
+		status = up->fpsave.fsw;
+		control = up->fpsave.fcw;
+		if((status & ~control) & 0x07F){
+			mathnote(status, up->fpsave.rip);
+			break;
+		}
+		fprestore(&up->fpsave);
+		up->fpstate = FPactive;
+		break;
+	case FPactive:
+		panic("math emu pid %ld %s pc %#p", 
+			up->pid, up->text, ureg->pc);
+		break;
+	}
+}
+
+/*
+ *  math coprocessor segment overrun
+ */
+static void
+mathover(Ureg*, void*)
+{
+	pexit("math overrun", 0);
+}
+
+void
+mathinit(void)
+{
+	trapenable(VectorCERR, matherror, 0, "matherror");
+	if(X86FAMILY(m->cpuidax) == 3)
+		intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
+	trapenable(VectorCNA, mathemu, 0, "mathemu");
+	trapenable(VectorCSO, mathover, 0, "mathover");
+}
+
+void
+procsetup(Proc *p)
+{
+	p->fpstate = FPinit;
+	_stts();
+	cycles(&p->kentry);
+	p->pcycles = -p->kentry;
+}
+
+void
+procfork(Proc *p)
+{
+	int s;
+
+	p->kentry = up->kentry;
+	p->pcycles = -p->kentry;
+
+	/* save floating point state */
+	s = splhi();
+	switch(up->fpstate & ~FPillegal){
+	case FPactive:
+		fpsave(&up->fpsave);
+		up->fpstate = FPinactive;
+	case FPinactive:
+		p->fpsave = up->fpsave;
+		p->fpstate = FPinactive;
+	}
+	splx(s);
+
+}
+
+void
+procrestore(Proc *p)
+{
+	uvlong t;
+
+	if(p->kp)
+		return;
+
+	cycles(&t);
+	p->kentry += t;
+	p->pcycles -= t;
+}
+
+void
+procsave(Proc *p)
+{
+	uvlong t;
+
+	cycles(&t);
+	p->kentry -= t;
+	p->pcycles += t;
+
+	if(p->fpstate == FPactive){
+		if(p->state == Moribund){
+			_clts();
+			_fnclex();
+			_stts();
+		}
+		else{
+			/*
+			 * Fpsave() stores without handling pending
+			 * unmasked exeptions. Postnote() can't be called
+			 * here as sleep() already has up->rlock, so
+			 * the handling of pending exceptions is delayed
+			 * until the process runs again and generates an
+			 * emulation fault to activate the FPU.
+			 */
+			fpsave(&p->fpsave);
+		}
+		p->fpstate = FPinactive;
+	}
+
+	/*
+	 * While this processor is in the scheduler, the process could run
+	 * on another processor and exit, returning the page tables to
+	 * the free list where they could be reallocated and overwritten.
+	 * When this processor eventually has to get an entry from the
+	 * trashed page tables it will crash.
+	 *
+	 * If there's only one processor, this can't happen.
+	 * You might think it would be a win not to do this in that case,
+	 * especially on VMware, but it turns out not to matter.
+	 */
+	mmuflushtlb();
+}
+
+int
+isaconfig(char *class, int ctlrno, ISAConf *isa)
+{
+	char cc[32], *p;
+	int i;
+
+	snprint(cc, sizeof cc, "%s%d", class, ctlrno);
+	p = getconf(cc);
+	if(p == nil)
+		return 0;
+
+	isa->type = "";
+	isa->nopt = tokenize(p, isa->opt, NISAOPT);
+	for(i = 0; i < isa->nopt; i++){
+		p = isa->opt[i];
+		if(cistrncmp(p, "type=", 5) == 0)
+			isa->type = p + 5;
+		else if(cistrncmp(p, "port=", 5) == 0)
+			isa->port = strtoul(p+5, &p, 0);
+		else if(cistrncmp(p, "irq=", 4) == 0)
+			isa->irq = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "dma=", 4) == 0)
+			isa->dma = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "mem=", 4) == 0)
+			isa->mem = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "size=", 5) == 0)
+			isa->size = strtoul(p+5, &p, 0);
+		else if(cistrncmp(p, "freq=", 5) == 0)
+			isa->freq = strtoul(p+5, &p, 0);
+	}
+	return 1;
+}
--- /dev/null
+++ b/sys/src/9/pc64/mem.h
@@ -1,0 +1,164 @@
+/*
+ * Memory and machine-specific definitions.  Used in C and assembler.
+ */
+#define KiB		1024u			/* Kibi 0x0000000000000400 */
+#define MiB		1048576u		/* Mebi 0x0000000000100000 */
+#define GiB		1073741824u		/* Gibi 000000000040000000 */
+#define TiB		1099511627776ull	/* Tebi 0x0000010000000000 */
+#define PiB		1125899906842624ull	/* Pebi 0x0004000000000000 */
+#define EiB		1152921504606846976ull	/* Exbi 0x1000000000000000 */
+
+#define MIN(a, b)	((a) < (b)? (a): (b))
+#define MAX(a, b)	((a) > (b)? (a): (b))
+
+#define ALIGNED(p, a)	(!(((uintptr)(p)) & ((a)-1)))
+
+/*
+ * Sizes
+ */
+#define	BI2BY		8			/* bits per byte */
+#define	BI2WD		32			/* bits per word */
+#define	BY2WD		8			/* bytes per word */
+#define	BY2V		8			/* bytes per double word */
+#define	BY2PG		(0x1000ull)		/* bytes per page */
+#define	WD2PG		(BY2PG/BY2WD)		/* words per page */
+#define	BY2XPG		(2*MiB)			/* bytes per big page */
+#define	PGSHIFT		12			/* log(BY2PG) */
+#define	ROUND(s, sz)	(((s)+((sz)-1))&~((sz)-1))
+#define	PGROUND(s)	ROUND(s, BY2PG)
+#define	BLOCKALIGN	8
+#define	FPalign		16
+
+#define	MAXMACH		32			/* max # cpus system can run */
+
+#define KSTACK		(16*KiB)		/* Size of Proc kernel stack */
+
+/*
+ * Time
+ */
+#define HZ		(100)			/* clock frequency */
+#define MS2HZ		(100/HZ)		/* millisec per clock tick */
+#define TK2SEC(t)	((t)/HZ)		/* ticks to seconds */
+
+/*
+ *  Address spaces. User:
+ */
+#define UTZERO		(0x0000000000200000ull)		/* first address in user text */
+#define TSTKTOP		(0x00007ffffffff000ull)
+#define USTKSIZE	(16*MiB)		/* size of user stack */
+#define USTKTOP		(TSTKTOP-USTKSIZE)	/* end of new stack in sysexec */
+
+/*
+ *  Address spaces. Kernel, sorted by address.
+ */
+#define KZERO		(0xffffffff80000000ull)	/* 2GB identity map of lower 2GB ram */
+#define KTZERO		(KZERO+1*MiB+64*KiB)
+
+#define VMAP		(0xffffffff00000000ull)	/* 2GB identity map of upper 2GB ram */
+#define VMAPSIZE	(2*GiB)
+
+#define	KMAP		(0xffffff7f00000000ull)
+#define KMAPSIZE	(512*GiB)
+
+/*
+ * Fundamental addresses - bottom 64kB saved for return to real mode
+ */
+#define	CONFADDR	(KZERO+0x1200ull)		/* info passed from boot loader */
+#define	APBOOTSTRAP	(KZERO+0x3000ull)		/* AP bootstrap code */
+#define	IDTADDR		(KZERO+0x10000ull)		/* idt */
+#define	REBOOTADDR	(0x11000)			/* reboot code - physical address */
+#define CPU0PML4	(KZERO+0x13000ull)
+#define	CPU0GDT		(KZERO+0x17000ull)		/* bootstrap processor GDT */
+#define	CPU0MACH	(KZERO+0x18000ull)		/* Mach for bootstrap processor */
+#define CPU0END		(CPU0MACH+MACHSIZE)
+
+#define	MACHSIZE	(2*KSTACK)
+#define	INIMAP		(8*MiB)		/* 4 pages; size of inital map in l.s */
+
+/*
+ *  known x86 segments (in GDT) and their selectors
+ */
+#define	NULLSEG	0	/* null segment */
+#define	KESEG	1	/* kernel executable */
+#define KDSEG	2	/* kernel data */
+#define UE32SEG	3	/* user executable 32bit */
+#define	UDSEG	4	/* user data/stack */
+#define	UESEG	5	/* user executable 64bit */
+#define	TSSSEG	8	/* task segment (two descriptors) */
+
+#define	NGDT	10	/* number of GDT entries required */
+
+#define	SELGDT	(0<<2)	/* selector is in gdt */
+#define	SELLDT	(1<<2)	/* selector is in ldt */
+
+#define	SELECTOR(i, t, p)	(((i)<<3) | (t) | (p))
+
+#define	NULLSEL	SELECTOR(NULLSEG, SELGDT, 0)
+#define	KESEL	SELECTOR(KESEG, SELGDT, 0)
+#define	UE32SEL	SELECTOR(UE32SEG, SELGDT, 3)
+#define	UDSEL	SELECTOR(UDSEG, SELGDT, 3)
+#define	UESEL	SELECTOR(UESEG, SELGDT, 3)
+#define	TSSSEL	SELECTOR(TSSSEG, SELGDT, 0)
+
+/*
+ *  fields in segment descriptors
+ */
+#define	SEGDATA	(0x10<<8)	/* data/stack segment */
+#define	SEGEXEC	(0x18<<8)	/* executable segment */
+#define	SEGTSS	(0x9<<8)	/* TSS segment */
+#define	SEGCG	(0x0C<<8)	/* call gate */
+#define	SEGIG	(0x0E<<8)	/* interrupt gate */
+#define	SEGTG	(0x0F<<8)	/* trap gate */
+#define	SEGLDT	(0x02<<8)	/* local descriptor table */
+#define	SEGTYPE	(0x1F<<8)
+
+#define	SEGP	(1<<15)		/* segment present */
+#define	SEGPL(x) ((x)<<13)	/* priority level */
+#define	SEGB	(1<<22)		/* granularity 1==4k (for expand-down) */
+#define	SEGD	(1<<22)		/* default 1==32bit (for code) */
+#define	SEGE	(1<<10)		/* expand down */
+#define	SEGW	(1<<9)		/* writable (for data/stack) */
+#define	SEGR	(1<<9)		/* readable (for code) */
+#define SEGL	(1<<21)		/* 64 bit */
+#define	SEGG	(1<<23)		/* granularity 1==4k (for other) */
+
+/*
+ *  virtual MMU
+ */
+#define	PTEMAPMEM	(1024*1024)	
+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)
+#define	SEGMAPSIZE	1984
+#define	SSEGMAPSIZE	16
+#define	PPN(x)		((x)&~((uintptr)BY2PG-1))
+
+/*
+ *  physical MMU
+ */
+#define	PTEVALID	(1ull<<0)
+#define	PTEWT		(1ull<<3)
+#define	PTEUNCACHED	(1ull<<4)
+#define	PTEWRITE	(1ull<<1)
+#define	PTERONLY	(0ull<<1)
+#define	PTEKERNEL	(0ull<<2)
+#define	PTEUSER		(1ull<<2)
+#define	PTESIZE		(1ull<<7)
+#define	PTEGLOBAL	(1ull<<8)
+
+/*
+ * Hierarchical Page Tables.
+ * For example, traditional IA-32 paging structures have 2 levels,
+ * level 1 is the PD, and level 0 the PT pages; with IA-32e paging,
+ * level 3 is the PML4(!), level 2 the PDP, level 1 the PD,
+ * and level 0 the PT pages. The PTLX macro gives an index into the
+ * page-table page at level 'l' for the virtual address 'v'.
+ */
+#define PTSZ		(4*KiB)			/* page table page size */
+#define PTSHIFT		9			/*  */
+
+#define PTLX(v, l)	(((v)>>(((l)*PTSHIFT)+PGSHIFT)) & ((1<<PTSHIFT)-1))
+#define PGLSZ(l)	(1ull<<(((l)*PTSHIFT)+PGSHIFT))
+
+#define	getpgcolor(a)	0
+
+#define RMACH		R15			/* m-> */
+#define RUSER		R14			/* up-> */
--- /dev/null
+++ b/sys/src/9/pc64/memory.c
@@ -1,0 +1,720 @@
+/*
+ * Size memory and create the kernel page-tables on the fly while doing so.
+ * Called from main(), this code should only be run by the bootstrap processor.
+ *
+ * MemMin is what the bootstrap code in l.s has already mapped;
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+#define MEMDEBUG	0
+
+enum {
+	MemUPA		= 0,		/* unbacked physical address */
+	MemRAM		= 1,		/* physical memory */
+	MemUMB		= 2,		/* upper memory block (<16MB) */
+	MemReserved	= 3,
+	NMemType	= 4,
+
+	KB		= 1024,
+
+	MemMin		= INIMAP,
+};
+
+typedef struct Map Map;
+struct Map {
+	uintptr	size;
+	uintptr	addr;
+};
+
+typedef struct RMap RMap;
+struct RMap {
+	char*	name;
+	Map*	map;
+	Map*	mapend;
+
+	Lock;
+};
+
+/* 
+ * Memory allocation tracking.
+ */
+static Map mapupa[16];
+static RMap rmapupa = {
+	"unallocated unbacked physical memory",
+	mapupa,
+	&mapupa[nelem(mapupa)-1],
+};
+
+static Map mapram[16];
+static RMap rmapram = {
+	"physical memory",
+	mapram,
+	&mapram[nelem(mapram)-1],
+};
+
+static Map mapumb[64];
+static RMap rmapumb = {
+	"upper memory block",
+	mapumb,
+	&mapumb[nelem(mapumb)-1],
+};
+
+static Map mapumbrw[16];
+static RMap rmapumbrw = {
+	"UMB device memory",
+	mapumbrw,
+	&mapumbrw[nelem(mapumbrw)-1],
+};
+
+void
+mapprint(RMap *rmap)
+{
+	Map *mp;
+
+	print("%s\n", rmap->name);	
+	for(mp = rmap->map; mp->size; mp++)
+		print("\t%#p %#p (%#p)\n", mp->addr, mp->addr+mp->size, mp->size);
+}
+
+
+void
+memdebug(void)
+{
+	ulong maxpa, maxpa1, maxpa2;
+
+	maxpa = (nvramread(0x18)<<8)|nvramread(0x17);
+	maxpa1 = (nvramread(0x31)<<8)|nvramread(0x30);
+	maxpa2 = (nvramread(0x16)<<8)|nvramread(0x15);
+	print("maxpa = %luX -> %luX, maxpa1 = %luX maxpa2 = %luX\n",
+		maxpa, MB+maxpa*KB, maxpa1, maxpa2);
+
+	mapprint(&rmapram);
+	mapprint(&rmapumb);
+	mapprint(&rmapumbrw);
+	mapprint(&rmapupa);
+}
+
+void
+mapfree(RMap* rmap, uintptr addr, uintptr size)
+{
+	Map *mp;
+	uintptr t;
+
+	if(size <= 0)
+		return;
+
+	lock(rmap);
+	for(mp = rmap->map; mp->addr <= addr && mp->size; mp++)
+		;
+
+	if(mp > rmap->map && (mp-1)->addr+(mp-1)->size == addr){
+		(mp-1)->size += size;
+		if(addr+size == mp->addr){
+			(mp-1)->size += mp->size;
+			while(mp->size){
+				mp++;
+				(mp-1)->addr = mp->addr;
+				(mp-1)->size = mp->size;
+			}
+		}
+	}
+	else{
+		if(addr+size == mp->addr && mp->size){
+			mp->addr -= size;
+			mp->size += size;
+		}
+		else do{
+			if(mp >= rmap->mapend){
+				print("mapfree: %s: losing %#p, %#p\n",
+					rmap->name, addr, size);
+				break;
+			}
+			t = mp->addr;
+			mp->addr = addr;
+			addr = t;
+			t = mp->size;
+			mp->size = size;
+			mp++;
+		}while(size = t);
+	}
+	unlock(rmap);
+}
+
+uintptr
+mapalloc(RMap* rmap, uintptr addr, int size, int align)
+{
+	Map *mp;
+	uintptr maddr, oaddr;
+
+	lock(rmap);
+	for(mp = rmap->map; mp->size; mp++){
+		maddr = mp->addr;
+
+		if(addr){
+			/*
+			 * A specific address range has been given:
+			 *   if the current map entry is greater then
+			 *   the address is not in the map;
+			 *   if the current map entry does not overlap
+			 *   the beginning of the requested range then
+			 *   continue on to the next map entry;
+			 *   if the current map entry does not entirely
+			 *   contain the requested range then the range
+			 *   is not in the map.
+			 */
+			if(maddr > addr)
+				break;
+			if(mp->size < addr - maddr)	/* maddr+mp->size < addr, but no overflow */
+				continue;
+			if(addr - maddr > mp->size - size)	/* addr+size > maddr+mp->size, but no overflow */
+				break;
+			maddr = addr;
+		}
+
+		if(align > 0)
+			maddr = ((maddr+align-1)/align)*align;
+		if(mp->addr+mp->size-maddr < size)
+			continue;
+
+		oaddr = mp->addr;
+		mp->addr = maddr+size;
+		mp->size -= maddr-oaddr+size;
+		if(mp->size == 0){
+			do{
+				mp++;
+				(mp-1)->addr = mp->addr;
+			}while((mp-1)->size = mp->size);
+		}
+
+		unlock(rmap);
+		if(oaddr != maddr)
+			mapfree(rmap, oaddr, maddr-oaddr);
+
+		return maddr;
+	}
+	unlock(rmap);
+
+	return 0;
+}
+
+/*
+ * Allocate from the ram map directly to make page tables.
+ * Called by mmuwalk during e820scan.
+ */
+void*
+rampage(void)
+{
+	uintptr m;
+	
+	m = mapalloc(&rmapram, 0, BY2PG, BY2PG);
+	if(m == 0)
+		return nil;
+	return KADDR(m);
+}
+
+static void
+umbexclude(void)
+{
+	int size;
+	ulong addr;
+	char *op, *p, *rptr;
+
+	if((p = getconf("umbexclude")) == nil)
+		return;
+
+	while(p && *p != '\0' && *p != '\n'){
+		op = p;
+		addr = strtoul(p, &rptr, 0);
+		if(rptr == nil || rptr == p || *rptr != '-'){
+			print("umbexclude: invalid argument <%s>\n", op);
+			break;
+		}
+		p = rptr+1;
+
+		size = strtoul(p, &rptr, 0) - addr + 1;
+		if(size <= 0){
+			print("umbexclude: bad range <%s>\n", op);
+			break;
+		}
+		if(rptr != nil && *rptr == ',')
+			*rptr++ = '\0';
+		p = rptr;
+
+		mapalloc(&rmapumb, addr, size, 0);
+	}
+}
+
+static void
+umbscan(void)
+{
+	uchar *p;
+
+	/*
+	 * Scan the Upper Memory Blocks (0xA0000->0xF0000) for pieces
+	 * which aren't used; they can be used later for devices which
+	 * want to allocate some virtual address space.
+	 * Check for two things:
+	 * 1) device BIOS ROM. This should start with a two-byte header
+	 *    of 0x55 0xAA, followed by a byte giving the size of the ROM
+	 *    in 512-byte chunks. These ROM's must start on a 2KB boundary.
+	 * 2) device memory. This is read-write.
+	 * There are some assumptions: there's VGA memory at 0xA0000 and
+	 * the VGA BIOS ROM is at 0xC0000. Also, if there's no ROM signature
+	 * at 0xE0000 then the whole 64KB up to 0xF0000 is theoretically up
+	 * for grabs; check anyway.
+	 */
+	p = KADDR(0xD0000);
+	while(p < (uchar*)KADDR(0xE0000)){
+		/*
+		 * Test for 0x55 0xAA before poking obtrusively,
+		 * some machines (e.g. Thinkpad X20) seem to map
+		 * something dynamic here (cardbus?) causing weird
+		 * problems if it is changed.
+		 */
+		if(p[0] == 0x55 && p[1] == 0xAA){
+			p += p[2]*512;
+			continue;
+		}
+
+		p[0] = 0xCC;
+		p[2*KB-1] = 0xCC;
+		if(p[0] != 0xCC || p[2*KB-1] != 0xCC){
+			p[0] = 0x55;
+			p[1] = 0xAA;
+			p[2] = 4;
+			if(p[0] == 0x55 && p[1] == 0xAA){
+				p += p[2]*512;
+				continue;
+			}
+			if(p[0] == 0xFF && p[1] == 0xFF)
+				mapfree(&rmapumb, PADDR(p), 2*KB);
+		}
+		else
+			mapfree(&rmapumbrw, PADDR(p), 2*KB);
+		p += 2*KB;
+	}
+
+	p = KADDR(0xE0000);
+	if(p[0] != 0x55 || p[1] != 0xAA){
+		p[0] = 0xCC;
+		p[64*KB-1] = 0xCC;
+		if(p[0] != 0xCC && p[64*KB-1] != 0xCC)
+			mapfree(&rmapumb, PADDR(p), 64*KB);
+	}
+
+	umbexclude();
+}
+
+int
+checksum(void *v, int n)
+{
+	uchar *p, s;
+
+	s = 0;
+	p = v;
+	while(n-- > 0)
+		s += *p++;
+	return s;
+}
+
+static void*
+sigscan(uchar* addr, int len, char* signature)
+{
+	int sl;
+	uchar *e, *p;
+
+	e = addr+len;
+	sl = strlen(signature);
+	for(p = addr; p+sl < e; p += 16)
+		if(memcmp(p, signature, sl) == 0)
+			return p;
+	return nil;
+}
+
+void*
+sigsearch(char* signature)
+{
+	uintptr p;
+	uchar *bda;
+	void *r;
+
+	/*
+	 * Search for the data structure:
+	 * 1) within the first KiB of the Extended BIOS Data Area (EBDA), or
+	 * 2) within the last KiB of system base memory if the EBDA segment
+	 *    is undefined, or
+	 * 3) within the BIOS ROM address space between 0xf0000 and 0xfffff
+	 *    (but will actually check 0xe0000 to 0xfffff).
+	 */
+	bda = KADDR(0x400);
+	if(memcmp(KADDR(0xfffd9), "EISA", 4) == 0){
+		if((p = (bda[0x0f]<<8)|bda[0x0e]) != 0){
+			if((r = sigscan(KADDR(p<<4), 1024, signature)) != nil)
+				return r;
+		}
+	}
+
+	if((p = ((bda[0x14]<<8)|bda[0x13])*1024) != 0){
+		if((r = sigscan(KADDR(p-1024), 1024, signature)) != nil)
+			return r;
+	}
+	/* hack for virtualbox: look in KiB below 0xa0000 */
+	if((r = sigscan(KADDR(0xa0000-1024), 1024, signature)) != nil)
+		return r;
+
+	return sigscan(KADDR(0xe0000), 0x20000, signature);
+}
+
+static void
+lowraminit(void)
+{
+	uintptr pa, x;
+
+	/*
+	 * Initialise the memory bank information for conventional memory
+	 * (i.e. less than 640KB). The base is the first location after the
+	 * bootstrap processor MMU information and the limit is obtained from
+	 * the BIOS data area.
+	 */
+	x = PADDR(PGROUND((uintptr)end));
+	pa = MemMin;
+	if(x > pa)
+		panic("kernel too big");
+	mapfree(&rmapram, x, pa-x);
+	memset(KADDR(x), 0, pa-x);		/* keep us honest */
+}
+
+typedef struct Emap Emap;
+struct Emap
+{
+	int type;
+	uvlong base;
+	uvlong top;
+};
+static Emap emap[128];
+int nemap;
+
+static int
+emapcmp(const void *va, const void *vb)
+{
+	Emap *a, *b;
+	
+	a = (Emap*)va;
+	b = (Emap*)vb;
+	if(a->top < b->top)
+		return -1;
+	if(a->top > b->top)
+		return 1;
+	if(a->base < b->base)
+		return -1;
+	if(a->base > b->base)
+		return 1;
+	return 0;
+}
+
+static void
+map(uintptr base, uintptr len, int type)
+{
+	uintptr e, n, *pte, flags, maxkpa;
+
+	/*
+	 * Split any call crossing MemMin to make below simpler.
+	 */
+	if(base < MemMin && len > MemMin-base){
+		n = MemMin - base;
+		map(base, n, type);
+		map(MemMin, len-n, type);
+	}
+	
+	/*
+	 * Let lowraminit and umbscan hash out the low MemMin.
+	 */
+	if(base < MemMin)
+		return;
+
+	/*
+	 * Any non-memory below 16*MB is used as upper mem blocks.
+	 */
+	if(type == MemUPA && base < 16*MB && len > 16*MB-base){
+		map(base, 16*MB-base, MemUMB);
+		map(16*MB, len-(16*MB-base), MemUPA);
+		return;
+	}
+	
+	/*
+	 * Memory below CPU0END is reserved for the kernel
+	 * and already mapped.
+	 */
+	if(base < PADDR(CPU0END)){
+		n = PADDR(CPU0END) - base;
+		if(len <= n)
+			return;
+		map(PADDR(CPU0END), len-n, type);
+		return;
+	}
+	
+	/*
+	 * Memory between KTZERO and end is the kernel itself
+	 * and is already mapped.
+	 */
+	if(base < PADDR(KTZERO) && len > PADDR(KTZERO)-base){
+		map(base, PADDR(KTZERO)-base, type);
+		return;
+	}
+	if(PADDR(KTZERO) < base && base < PADDR(PGROUND((uintptr)end))){
+		n = PADDR(PGROUND((uintptr)end));
+		if(len <= n)
+			return;
+		map(PADDR(PGROUND((uintptr)end)), len-n, type);
+		return;
+	}
+	
+	/*
+	 * Now we have a simple case.
+	 */
+	switch(type){
+	case MemRAM:
+		mapfree(&rmapram, base, len);
+		flags = PTEWRITE|PTEVALID;
+		break;
+	case MemUMB:
+		mapfree(&rmapumb, base, len);
+		flags = PTEWRITE|PTEUNCACHED|PTEVALID;
+		break;
+	case MemUPA:
+		mapfree(&rmapupa, base, len);
+		flags = 0;
+		break;
+	default:
+	case MemReserved:
+		flags = 0;
+		break;
+	}
+	
+	/*
+	 * bottom MemMin is already mapped - just twiddle flags.
+	 * (not currently used - see above)
+	 */
+	if(base < MemMin){
+		e = base+len;
+		base &= ~((uintptr)PGLSZ(1)-1);
+		for(; base<e; base+=PGLSZ(1)){
+			pte = mmuwalk(m->pml4, base+KZERO, 1, 0);
+			if(pte != 0 && *pte & PTEVALID)
+				*pte |= flags;
+		}
+		return;
+	}
+	
+	if(flags){
+		maxkpa = -KZERO;
+		if(base >= maxkpa)
+			return;
+		if(len > maxkpa-base)
+			len = maxkpa - base;
+		pmap(m->pml4, base|flags, base+KZERO, len);
+	}
+}
+
+static int
+e820scan(void)
+{
+	uintptr base, len, last;
+	Emap *e;
+	char *s;
+	int i;
+
+	/* passed by bootloader */
+	if((s = getconf("*e820")) == nil)
+		if((s = getconf("e820")) == nil)
+			return -1;
+	nemap = 0;
+	while(nemap < nelem(emap)){
+		while(*s == ' ')
+			s++;
+		if(*s == 0)
+			break;
+		e = emap + nemap;
+		e->type = 1;
+		if(s[1] == ' '){	/* new format */
+			e->type = s[0] - '0';
+			s += 2;
+		}
+		e->base = strtoull(s, &s, 16);
+		if(*s != ' ')
+			break;
+		e->top  = strtoull(s, &s, 16);
+		if(*s != ' ' && *s != 0)
+			break;
+		if(e->base < e->top)
+			nemap++;
+	}
+	if(nemap == 0)
+		return -1;
+	qsort(emap, nemap, sizeof emap[0], emapcmp);
+	last = 0;
+	for(i=0; i<nemap; i++){	
+		e = &emap[i];
+		/*
+		 * pull out the info but only about the low 32 bits...
+		 */
+		if(e->top <= last)
+			continue;
+		if(e->base < last)
+			base = last;
+		else
+			base = e->base;
+		len = e->top - base;
+		/*
+		 * If the map skips addresses, mark them available.
+		 */
+		if(last < base)
+			map(last, base-last, MemUPA);
+		map(base, len, (e->type == 1) ? MemRAM : MemReserved);
+		last = base + len;
+		if(last == 0)
+			break;
+	}
+	if(last != 0)
+		map(last, -last, MemUPA);
+	return 0;
+}
+
+void
+meminit(void)
+{
+	int i;
+	Map *mp;
+	Confmem *cm;
+	uintptr lost;
+
+	umbscan();
+	// lowraminit();
+	e820scan();
+
+	/*
+	 * Set the conf entries describing banks of allocatable memory.
+	 */
+	for(i=0; i<nelem(mapram) && i<nelem(conf.mem); i++){
+		mp = &rmapram.map[i];
+		cm = &conf.mem[i];
+		cm->base = mp->addr;
+		cm->npage = mp->size/BY2PG;
+	}
+
+	lost = 0;
+	for(; i<nelem(mapram); i++)
+		lost += rmapram.map[i].size;
+	if(lost)
+		print("meminit - lost %llud bytes\n", lost);
+
+	if(MEMDEBUG)
+		memdebug();
+}
+
+/*
+ * Allocate memory from the upper memory blocks.
+ */
+uintptr
+umbmalloc(uintptr addr, int size, int align)
+{
+	uintptr a;
+
+	if(a = mapalloc(&rmapumb, addr, size, align))
+		return (uintptr)KADDR(a);
+
+	return 0;
+}
+
+void
+umbfree(uintptr addr, int size)
+{
+	mapfree(&rmapumb, PADDR(addr), size);
+}
+
+uintptr
+umbrwmalloc(uintptr addr, int size, int align)
+{
+	uintptr a;
+	uchar *p;
+
+	if(a = mapalloc(&rmapumbrw, addr, size, align))
+		return (uintptr)KADDR(a);
+
+	/*
+	 * Perhaps the memory wasn't visible before
+	 * the interface is initialised, so try again.
+	 */
+	if((a = umbmalloc(addr, size, align)) == 0)
+		return 0;
+	p = (uchar*)a;
+	p[0] = 0xCC;
+	p[size-1] = 0xCC;
+	if(p[0] == 0xCC && p[size-1] == 0xCC)
+		return a;
+	umbfree(a, size);
+
+	return 0;
+}
+
+void
+umbrwfree(uintptr addr, int size)
+{
+	mapfree(&rmapumbrw, PADDR(addr), size);
+}
+
+/*
+ * Give out otherwise-unused physical address space
+ * for use in configuring devices.  Note that upaalloc
+ * does not map the physical address into virtual memory.
+ * Call vmap to do that.
+ */
+uintptr
+upaalloc(int size, int align)
+{
+	uintptr a;
+
+	a = mapalloc(&rmapupa, 0, size, align);
+	if(a == 0){
+		print("out of physical address space allocating %d\n", size);
+		mapprint(&rmapupa);
+	}
+	return a;
+}
+
+void
+upafree(uintptr pa, int size)
+{
+	mapfree(&rmapupa, pa, size);
+}
+
+void
+upareserve(uintptr pa, int size)
+{
+	uintptr a;
+	
+	a = mapalloc(&rmapupa, pa, size, 0);
+	if(a != pa){
+		/*
+		 * This can happen when we're using the E820
+		 * map, which might have already reserved some
+		 * of the regions claimed by the pci devices.
+		 */
+	//	print("upareserve: cannot reserve pa=%#p size=%d\n", pa, size);
+		if(a != 0)
+			mapfree(&rmapupa, a, size);
+	}
+}
+
+void
+memorysummary(void)
+{
+	memdebug();
+}
+
--- /dev/null
+++ b/sys/src/9/pc64/mkfile
@@ -1,0 +1,149 @@
+CONF=pc64
+CONFLIST=pc64
+
+objtype=amd64
+</$objtype/mkfile
+p=9
+
+KTZERO=0xffffffff80110000
+APBOOTSTRAP=0xffffffff80003000
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+	alarm.$O\
+	alloc.$O\
+	allocb.$O\
+	auth.$O\
+	cache.$O\
+	chan.$O\
+	dev.$O\
+	edf.$O\
+	fault.$O\
+	page.$O\
+	parse.$O\
+	pgrp.$O\
+	portclock.$O\
+	print.$O\
+	proc.$O\
+	qio.$O\
+	qlock.$O\
+	rdb.$O\
+	rebootcmd.$O\
+	segment.$O\
+	swap.$O\
+	syscallfmt.$O\
+	sysfile.$O\
+	sysproc.$O\
+	taslock.$O\
+	tod.$O\
+	xalloc.$O\
+	random.$O\
+
+OBJ=\
+	l.$O\
+	cga.$O\
+	i8253.$O\
+	i8259.$O\
+	main.$O\
+	memory.$O\
+	mmu.$O\
+	trap.$O\
+	$CONF.root.$O\
+	$CONF.rootc.$O\
+	$DEVS\
+	$PORT\
+
+LIB=\
+	/$objtype/lib/libmemlayer.a\
+	/$objtype/lib/libmemdraw.a\
+	/$objtype/lib/libdraw.a\
+	/$objtype/lib/libip.a\
+	/$objtype/lib/libsec.a\
+	/$objtype/lib/libmp.a\
+	/$objtype/lib/libc.a\
+	/$objtype/lib/libfis.a\
+	/$objtype/lib/libaml.a\
+
+ETHER=`{cd ../pc; echo devether.c ether*.c | sed 's/\.c/.'$O'/g'}
+AUDIO=`{cd ../pc; echo devaudio.c audio*.c | sed 's/\.c/.'$O'/g'}
+VGA=`{cd ../pc; echo devvga.c screen.c vga*.c | sed 's/\.c/.'$O'/g'}
+SDEV=`{cd ../pc; echo devsd.c sd*.c | sed 's/\.c/.'$O'/g'}
+
+$p$CONF:	$CONF.c $OBJ $LIB
+	$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
+	$LD -o $target -T$KTZERO -R4096 -l $OBJ $CONF.$O $LIB
+	size $target
+
+install:V:	$p$CONF
+	cp $p$CONF /$objtype/
+	for(i in $EXTRACOPIES)
+		import $i / /n/$i && cp $p$CONF $p$CONF.gz /n/$i/$objtype/
+
+
+# copies generated by the rule below
+PCHEADERS=wifi.h uncached.h usbehci.h screen.h etherif.h mp.h io.h
+
+&.h:	../pc/&.h
+	cp $prereq .
+
+PCFILES=`{../port/mkfilelist ../pc}
+^($PCFILES)\.$O:R:	'../pc/\1.c'
+	$CC $CFLAGS -I. -. ../pc/$stem1.c
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+l.$O apbootstrap.$O:		mem.h
+
+$ETHER: 			etherif.h ../port/netif.h
+$AUDIO:				../port/audioif.h
+ether8003.$O ether8390.$O:	ether8390.h
+etheryuk.$O:			yukdump.h
+$VGA mouse.$O:			screen.h /sys/include/memdraw.h
+vgavesa.$O:			/386/include/ureg.h
+
+archmp.$O mp.$O:		apbootstrap.h
+apic.$O archmp.$O mp.$O:	mp.h
+squidboy.$O:			mp.h
+
+$SDEV:				../port/sd.h
+sdiahci.$O:			ahci.h
+devaoe.$O sdaoe.$O:		../port/aoe.h
+
+main.$O:			init.h
+
+devusb.$O usbuhci.$O usbohci.$O usbehci.$O: ../port/usb.h
+usbehci.$O:			usbehci.h uncached.h
+trap.$O:			/sys/include/tos.h
+etheriwl.$O:			wifi.h
+etherrt2860.$O: 		wifi.h
+wifi.$O:			wifi.h
+
+init.h:D:		../port/initcode.c ../pc/init9.c
+	$CC ../port/initcode.c
+	$CC ../pc/init9.c
+	$LD -l -R1 -s -o init.out init9.$O initcode.$O /$objtype/lib/libc.a
+	{echo 'uchar initcode[]={'
+	 xd -1x <init.out |
+		sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+	 echo '};'} > init.h
+
+apbootstrap.h:	apbootstrap.s
+	$AS apbootstrap.s
+	$LD -l -R1 -s -o apbootstrap.out -T$APBOOTSTRAP apbootstrap.$O
+	{echo 'uchar apbootstrap[]={'
+	 dd -if apbootstrap.out -bs 1 -iseek 40 | 
+	 xd -1x |
+	 sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+	 echo '};'} > $target
+
+sd53c8xx.i:	sd53c8xx.n
+	aux/na $prereq > $target
+
+acid:V:
+	$CC -a -w -I. -. ../pc/i8253.c>acid
+
+%.clean:V:
+	rm -f $stem.c [9bz]$stem [9bz]$stem.gz boot$stem.* apbootstrap.h init.h $PCHEADERS
--- /dev/null
+++ b/sys/src/9/pc64/mmu.c
@@ -1,0 +1,505 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+
+/*
+ * Simple segment descriptors with no translation.
+ */
+#define	EXECSEGM(p) 	{ 0, SEGL|SEGP|SEGPL(p)|SEGEXEC }
+#define	DATASEGM(p) 	{ 0, SEGB|SEGG|SEGP|SEGPL(p)|SEGDATA|SEGW }
+#define	EXEC32SEGM(p) 	{ 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
+#define	DATA32SEGM(p) 	{ 0xFFFF, SEGB|SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
+
+Segdesc gdt[NGDT] =
+{
+[NULLSEG]	{ 0, 0},		/* null descriptor */
+[KESEG]		EXECSEGM(0),		/* kernel code */
+[KDSEG]		DATASEGM(0),		/* kernel data */
+[UE32SEG]	EXEC32SEGM(3),		/* user code 32 bit*/
+[UDSEG]		DATA32SEGM(3),		/* user data/stack */
+[UESEG]		EXECSEGM(3),		/* user code */
+};
+
+static int didmmuinit = 0;
+
+/* level */
+enum {
+	PML4E	= 2,
+	PDPE	= 1,
+	PDE	= 0,
+
+	MAPBITS	= 8*sizeof(m->mmumap[0]),
+};
+
+static void
+loadptr(u16int lim, uintptr off, void (*load)(void*))
+{
+	u64int b[2], *o;
+	u16int *s;
+
+	o = &b[1];
+	s = ((u16int*)o)-1;
+
+	*s = lim;
+	*o = off;
+
+	(*load)(s);
+}
+
+static void
+taskswitch(uintptr stack)
+{
+	Tss *tss;
+
+	tss = m->tss;
+	tss->rsp0[0] = (u32int)stack;
+	tss->rsp0[1] = stack >> 32;
+	tss->rsp1[0] = (u32int)stack;
+	tss->rsp1[1] = stack >> 32;
+	tss->rsp2[0] = (u32int)stack;
+	tss->rsp2[1] = stack >> 32;
+	mmuflushtlb();
+}
+
+void
+mmuinit(void)
+{
+	uintptr x;
+	vlong v;
+	int i;
+
+	didmmuinit = 1;
+
+	/* zap double map done by l.s */ 
+	m->pml4[0] = 0;
+	m->pml4[512] = 0;
+
+	m->tss = mallocz(sizeof(Tss), 1);
+	if(m->tss == nil)
+		panic("mmuinit: no memory for Tss");
+	m->tss->iomap = 0xDFFF;
+	for(i=0; i<14; i+=2){
+		x = (uintptr)m + MACHSIZE;
+		m->tss->ist[i] = x;
+		m->tss->ist[i+1] = x>>32;
+	}
+
+	/*
+	 * We used to keep the GDT in the Mach structure, but it
+	 * turns out that that slows down access to the rest of the
+	 * page.  Since the Mach structure is accessed quite often,
+	 * it pays off anywhere from a factor of 1.25 to 2 on real
+	 * hardware to separate them (the AMDs are more sensitive
+	 * than Intels in this regard).  Under VMware it pays off
+	 * a factor of about 10 to 100.
+	 */
+	memmove(m->gdt, gdt, sizeof gdt);
+
+	x = (uintptr)m->tss;
+	m->gdt[TSSSEG+0].d0 = (x<<16)|(sizeof(Tss)-1);
+	m->gdt[TSSSEG+0].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
+	m->gdt[TSSSEG+1].d0 = x>>32;
+	m->gdt[TSSSEG+1].d1 = 0;
+
+	loadptr(sizeof(gdt)-1, (uintptr)m->gdt, lgdt);
+	loadptr(sizeof(Segdesc)*512-1, (uintptr)IDTADDR, lidt);
+	taskswitch((uintptr)m + MACHSIZE);
+	ltr(TSSSEL);
+
+	wrmsr(0xc0000100, 0ull);	/* 64 bit fsbase */
+	wrmsr(0xc0000101, (uvlong)&machp[m->machno]);	/* 64 bit gsbase */
+	wrmsr(0xc0000102, 0ull);	/* kernel gs base */
+
+	/* enable syscall extension */
+	rdmsr(0xc0000080, &v);
+	v |= 1ull;
+	wrmsr(0xc0000080, v);
+
+	/* IA32_STAR */
+	wrmsr(0xc0000081, ((uvlong)UE32SEL << 48) | ((uvlong)KESEL << 32));
+
+	/* IA32_LSTAR */
+	wrmsr(0xc0000082, (uvlong)syscallentry);
+
+	/* SYSCALL flags mask */
+	wrmsr(0xc0000084, 0x200);
+}
+
+/*
+ * These could go back to being macros once the kernel is debugged,
+ * but the extra checking is nice to have.
+ */
+void*
+kaddr(uintptr pa)
+{
+	if(pa > (uintptr)-KZERO)
+		panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
+	return (void*)(pa+KZERO);
+}
+
+uintptr
+paddr(void *v)
+{
+	uintptr va;
+	
+	va = (uintptr)v;
+	if(va >= KZERO)
+		return va-KZERO;
+	if(va >= VMAP)
+		return va-VMAP;
+	panic("paddr: va=%#p pc=%#p", va, getcallerpc(&v));
+	return 0;
+}
+
+static MMU*
+mmualloc(void)
+{
+	MMU *p;
+	int i, n;
+
+	p = m->mmufree;
+	if(p == nil){
+		n = 256;
+		p = malloc(n * sizeof(MMU));
+		if(p == nil)
+			panic("mmualloc: out of memory for MMU");
+		p->page = mallocalign(n * PTSZ, BY2PG, 0, 0);
+		if(p->page == nil)
+			panic("mmualloc: out of memory for MMU pages");
+		for(i=1; i<n; i++){
+			p[i].page = p[i-1].page + (1<<PTSHIFT);
+			p[i-1].next = &p[i];
+		}
+		m->mmucount += n;
+	}
+	m->mmucount--;
+	m->mmufree = p->next;
+	p->next = nil;
+	return p;
+}
+
+uintptr*
+mmuwalk(uintptr* table, uintptr va, int level, int create)
+{
+	uintptr pte, *page;
+	int i, x;
+	MMU *p;
+
+	x = PTLX(va, 3);
+	for(i = 2; i >= level; i--){
+		pte = table[x];
+		if(pte & PTEVALID){
+			if(pte & PTESIZE)
+				return 0;
+			table = KADDR(PPN(pte));
+		} else {			
+			if(!create)
+				return 0;
+			pte = PTEWRITE|PTEVALID;
+			if(va < VMAP){
+				if(va < TSTKTOP)
+					pte |= PTEUSER;
+				p = mmualloc();
+				p->index = x;
+				p->level = i;
+				if(i == PML4E){
+					/* PML4 entries linked to head */
+					p->next = up->mmuhead;
+					if(p->next == nil)
+						up->mmutail = p;
+					up->mmuhead = p;
+					if(p->index <= PTLX(TSTKTOP, 3))
+						m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS);
+				} else {
+					/* PDP and PD entries linked to tail */
+					up->mmutail->next = p;
+				}
+				page = p->page;
+			} else if(didmmuinit) {
+				page = mallocalign(PTSZ, BY2PG, 0, 0);
+			} else
+				page = rampage();
+			memset(page, 0, PTSZ);
+			table[x] = PADDR(page) | pte;
+			table = page;
+		}
+		x = PTLX(va, i);
+	}
+	return &table[x];
+}
+
+static int
+ptecount(uintptr va, int level)
+{
+	return (1<<PTSHIFT) - (va & PGLSZ(level+1)-1) / PGLSZ(level);
+}
+
+void
+pmap(uintptr *pml4, uintptr pa, uintptr va, int size)
+{
+	uintptr *pte, *ptee, flags;
+	int z, l;
+
+	if((size <= 0) || va < VMAP)
+		panic("pmap: pa=%#p va=%#p size=%d", pa, va, size);
+	flags = pa;
+	pa = PPN(pa);
+	flags -= pa;
+	if(va >= KZERO)
+		flags |= PTEGLOBAL;
+	while(size > 0){
+		if(size >= PGLSZ(1) && (va % PGLSZ(1)) == 0)
+			flags |= PTESIZE;
+		l = (flags & PTESIZE) != 0;
+		z = PGLSZ(l);
+		pte = mmuwalk(pml4, va, l, 1);
+		if(pte == 0){
+			pte = mmuwalk(pml4, va, ++l, 0);
+			if(pte && (*pte & PTESIZE)){
+				flags |= PTESIZE;
+				z = va & PGLSZ(l)-1;
+				va -= z;
+				pa -= z;
+				size += z;
+				continue;
+			}
+			panic("pmap: pa=%#p va=%#p size=%d", pa, va, size);
+		}
+		ptee = pte + ptecount(va, l);
+		while(size > 0 && pte < ptee){
+			*pte++ = pa | flags;
+			pa += z;
+			va += z;
+			size -= z;
+		}
+	}
+}
+
+static void
+mmuzap(void)
+{
+	uintptr *pte;
+	u64int w;
+	int i, x;
+
+	pte = m->pml4;
+	pte[PTLX(KMAP, 3)] = 0;
+
+	/* common case */
+	pte[PTLX(UTZERO, 3)] = 0;
+	pte[PTLX(TSTKTOP, 3)] = 0;
+	m->mmumap[PTLX(UTZERO, 3)/MAPBITS] &= ~(1ull<<(PTLX(UTZERO, 3)%MAPBITS));
+	m->mmumap[PTLX(TSTKTOP, 3)/MAPBITS] &= ~(1ull<<(PTLX(TSTKTOP, 3)%MAPBITS));
+
+	for(i = 0; i < nelem(m->mmumap); pte += MAPBITS, i++){
+		w = m->mmumap[i];
+		if(w == 0)
+			continue;
+		x = 0;
+		do {
+			if(w & 1)
+				pte[x] = 0;
+			x++;
+			x >>= 1;
+		} while(w);
+		m->mmumap[i] = 0;
+	}
+}
+
+static void
+mmufree(Proc *proc)
+{
+	MMU *p;
+
+	p = proc->mmutail;
+	if(p != nil){
+		p->next = m->mmufree;
+		m->mmufree = proc->mmuhead;
+		proc->mmuhead = proc->mmutail = nil;
+		m->mmucount += proc->mmucount;
+		proc->mmucount = 0;
+	}
+}
+
+void
+flushmmu(void)
+{
+	int x;
+
+	x = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(x);
+}
+
+void
+mmuswitch(Proc *proc)
+{
+	uintptr pte;
+	MMU *p;
+
+	mmuzap();
+	if(proc->newtlb){
+		mmufree(proc);
+		proc->newtlb = 0;
+	}
+	for(p = proc->mmuhead; p && p->level==PML4E; p = p->next){
+		pte = PADDR(p->page) | PTEWRITE|PTEVALID;
+		if(p->index <= PTLX(TSTKTOP, 3)){
+			m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS);
+			pte |= PTEUSER;
+		}
+		m->pml4[p->index] = pte;
+	}
+	taskswitch((uintptr)proc->kstack+KSTACK);
+}
+
+void
+mmurelease(Proc *proc)
+{
+	mmuzap();
+	mmufree(proc);
+	taskswitch((uintptr)m+MACHSIZE);
+}
+
+void
+putmmu(uintptr va, uintptr pa, Page *)
+{
+	uintptr *pte, old;
+	int x;
+
+	x = splhi();
+	pte = mmuwalk(m->pml4, va, 0, 1);
+	if(pte == 0){
+		panic("putmmu: bug: va=%#p pa=%#p", va, pa);
+		return;
+	}
+	old = *pte;
+	*pte = pa | PTEVALID|PTEUSER;
+	splx(x);
+	if(old & PTEVALID)
+		invlpg(va);
+}
+
+void
+checkmmu(uintptr va, uintptr pa)
+{
+	USED(va, pa);
+}
+
+uintptr
+cankaddr(uintptr pa)
+{
+	if(pa >= -KZERO)
+		return 0;
+	return -KZERO - pa;
+}
+
+void
+countpagerefs(ulong *ref, int print)
+{
+	USED(ref, print);
+}
+
+KMap*
+kmap(Page *page)
+{
+	uintptr *pte, pa, va;
+	int x;
+
+	pa = page->pa;
+	if(cankaddr(pa) != 0)
+		return (KMap*)KADDR(pa);
+
+	x = splhi();
+	va = KMAP + ((uintptr)m->kmapindex << PGSHIFT);
+	pte = mmuwalk(m->pml4, va, 0, 1);
+	if(pte == 0 || *pte & PTEVALID)
+		panic("kmap: pa=%#p va=%#p", pa, va);
+	*pte = pa | PTEWRITE|PTEVALID;
+	m->kmapindex = (m->kmapindex + 1) % (1<<PTSHIFT);
+	if(m->kmapindex == 0)
+		mmuflushtlb();
+	splx(x);
+	return (KMap*)va;
+}
+
+void
+kunmap(KMap *k)
+{
+	uintptr *pte, va;
+	int x;
+
+	va = (uintptr)k;
+	if(va >= KZERO)
+		return;
+
+	x = splhi();
+	pte = mmuwalk(m->pml4, va, 0, 0);
+	if(pte == 0 || (*pte & PTEVALID) == 0)
+		panic("kunmap: va=%#p", va);
+	*pte = 0;
+	splx(x);
+}
+
+/*
+ * Add a device mapping to the vmap range.
+ */
+void*
+vmap(uintptr pa, int size)
+{
+	uintptr va;
+	int o;
+
+	if(size <= 0 || pa & ~0xffffffffull)
+		panic("vmap: pa=%#p size=%d pc=%#p", pa, size, getcallerpc(&pa));
+	if(cankaddr(pa) >= size)
+		va = pa+KZERO;
+	else
+		va = pa+VMAP;
+	/*
+	 * might be asking for less than a page.
+	 */
+	o = pa & (BY2PG-1);
+	pa -= o;
+	va -= o;
+	size += o;
+	pmap(m->pml4, pa | PTEUNCACHED|PTEWRITE|PTEVALID, va, size);
+	return (void*)(va+o);
+}
+
+void
+vunmap(void *v, int)
+{
+	paddr(v);	/* will panic on error */
+}
+
+/*
+ * vmapsync() is currently unused as the VMAP and KZERO PDPs
+ * are shared between processors. (see mpstartap)
+ */
+int
+vmapsync(uintptr va)
+{
+	uintptr *pte1, *pte2;
+	int level;
+
+	if(va < VMAP || m->machno == 0)
+		return 0;
+
+	for(level=0; level<2; level++){
+		pte1 = mmuwalk(MACHP(0)->pml4, va, level, 0);
+		if(pte1 && *pte1 & PTEVALID){
+			pte2 = mmuwalk(m->pml4, va, level, 1);
+			if(pte2 == 0)
+				break;
+			if(pte1 != pte2)
+				*pte2 = *pte1;
+			return 1;
+		}
+	}
+	return 0;
+}
--- /dev/null
+++ b/sys/src/9/pc64/pc64
@@ -1,0 +1,153 @@
+# pcf - pc terminal with local disk
+dev
+	root
+	cons
+	arch
+	pnp		pci
+	env
+	pipe
+	proc
+	mnt
+	srv
+	shr
+	dup
+	rtc
+	ssl
+	tls
+	cap
+	kprof
+	fs
+
+	ether		netif
+	ip		arp chandial ip ipv6 ipaux iproute netlog ethermedium nullmedium pktmedium inferno
+
+	draw		screen vga vgax swcursor
+	mouse		mouse
+	kbd
+	vga
+
+	sd
+#	floppy		dma
+#	aoe
+#	lpt
+
+	audio		dma
+#	pccard
+#	i82365		cis
+	uart
+	usb
+
+link
+#	segdesc
+#	devpccard
+#	devi82365
+#	cputemp
+#	apm		apmjump
+#	ether2000	ether8390
+#	ether2114x	pci
+#	ether589	etherelnk3
+#	ether79c970	pci
+#	ether8003	ether8390
+#	ether8139	pci
+#	ether8169	pci ethermii
+# should be obsoleted by igbe
+#	ether82543gc	pci
+#	ether82557	pci
+	ether82563	pci
+#	ether82598	pci
+#	ether83815	pci
+#	etherbcm        pci
+#	etherdp83820	pci
+#	etherec2t	ether8390
+#	etherelnk3	pci
+#	etherga620	pci
+#	etherigbe	pci ethermii
+#	ethervgbe	pci ethermii
+#	ethervt6102	pci ethermii
+#	ethervt6105m	pci ethermii
+#	ethersink
+#	ethersmc	devi82365 cis
+#	etheryuk	pci
+#	etherwavelan	wavelan devi82365 cis pci
+	etheriwl	pci wifi
+#	etherrt2860	pci wifi
+	ethermedium
+#	pcmciamodem
+	netdevmedium
+	loopbackmedium
+	usbuhci
+#	usbohci
+	usbehci		usbehcipc
+
+#	audiosb16	dma
+#	audioac97	audioac97mix
+	audiohda
+
+misc
+	archacpi	mp apic squidboy
+	archmp		mp apic squidboy
+	mtrr
+
+#	sdaoe
+#	sdide		pci sdscsi
+#	sd53c8xx	pci sdscsi
+#	sdmylex		pci sdscsi
+#	sdiahci		pci sdscsi led
+#	sdodin		pci sdscsi led
+#	sdvirtio	pci sdscsi
+#	sdmmc		pci pmmc
+#	sdloop
+
+#	uarti8250
+#	uartisa
+#	uartpci		pci
+
+#	vga3dfx		+cur
+#	vgaark2000pv	+cur
+#	vgabt485	=cur
+#	vgaclgd542x	+cur
+#	vgaclgd546x	+cur
+#	vgact65545	+cur
+#	vgacyber938x	+cur
+#	vgaet4000	+cur
+#	vgageode	+cur
+#	vgahiqvideo	+cur
+#	vgai81x		+cur
+#	vgamach64xx	+cur
+#	vgamga2164w	+cur
+#	vgamga4xx	+cur
+#	vganeomagic	+cur
+#	vganvidia	+cur
+#	vgaradeon	+cur
+#	vgargb524	=cur
+#	vgas3		+cur vgasavage
+#	vgat2r4		+cur
+#	vgatvp3020	=cur
+#	vgatvp3026	=cur
+	vgavesa
+#	vgavmware	+cur
+
+ip
+	tcp
+	udp
+	rudp
+	ipifc
+	icmp
+	icmp6
+	gre
+	ipmux
+	esp
+	il
+
+port
+	int cpuserver = 0;
+
+boot boot
+	tcp
+	local
+
+bootdir
+	boot$CONF.out	boot
+	/$objtype/bin/paqfs
+	/$objtype/bin/auth/factotum
+	bootfs.paq
--- /dev/null
+++ b/sys/src/9/pc64/squidboy.c
@@ -1,0 +1,113 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+#include "mp.h"
+
+extern void checkmtrr(void);
+
+static void
+squidboy(Apic* apic)
+{
+	machinit();
+	mmuinit();
+	cpuidentify();
+	cpuidprint();
+	checkmtrr();
+	apic->online = 1;
+	coherence();
+
+	lapicinit(apic);
+	lapiconline();
+	syncclock();
+	timersinit();
+
+	lock(&active);
+	active.machs |= 1<<m->machno;
+	unlock(&active);
+
+	while(!active.thunderbirdsarego)
+		microdelay(100);
+
+	schedinit();
+}
+
+void
+mpstartap(Apic* apic)
+{
+	uintptr *apbootp, *pml4, *pdp0;
+	Segdesc *gdt;
+	Mach *mach;
+	uchar *p;
+	int i;
+
+	/*
+	 * Initialise the AP page-tables and Mach structure.
+	 * Xspanalloc will panic if an allocation can't be made.
+	 */
+	p = xspanalloc(2*PTSZ + BY2PG + MACHSIZE, BY2PG, 0);
+	pml4 = (uintptr*)p;
+	p += PTSZ;
+	pdp0 = (uintptr*)p;
+	p += PTSZ;
+	gdt = (Segdesc*)p;
+	p += BY2PG;
+	mach = (Mach*)p;
+
+	memset(pml4, 0, PTSZ);
+	memset(pdp0, 0, PTSZ);
+	memset(gdt, 0, BY2PG);
+	memset(mach, 0, MACHSIZE);
+
+	mach->machno = apic->machno;
+	mach->pml4 = pml4;
+	mach->gdt  = gdt;	/* filled by mmuinit */
+	MACHP(mach->machno) = mach;
+
+	/*
+	 * map KZERO (note that we share the KZERO (and VMAP)
+	 * PDP between processors.
+	 */
+	pml4[PTLX(KZERO, 3)] = MACHP(0)->pml4[PTLX(KZERO, 3)];
+
+	/* double map */
+	pml4[0] = PADDR(pdp0) | PTEWRITE|PTEVALID;
+	pdp0[0] = *mmuwalk(pml4, KZERO, 2, 0);
+
+	/*
+	 * Tell the AP where its kernel vector and pdb are.
+	 * The offsets are known in the AP bootstrap code.
+	 */
+	apbootp = (uintptr*)(APBOOTSTRAP+0x08);
+	apbootp[0] = (uintptr)squidboy;	/* assembler jumps here eventually */
+	apbootp[1] = (uintptr)PADDR(pml4);
+	apbootp[2] = (uintptr)apic;
+	apbootp[3] = (uintptr)mach;
+
+	/*
+	 * Universal Startup Algorithm.
+	 */
+	p = KADDR(0x467);		/* warm-reset vector */
+	*p++ = PADDR(APBOOTSTRAP);
+	*p++ = PADDR(APBOOTSTRAP)>>8;
+	i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16;
+	/* code assumes i==0 */
+	if(i != 0)
+		print("mp: bad APBOOTSTRAP\n");
+	*p++ = i;
+	*p = i>>8;
+	coherence();
+
+	nvramwrite(0x0F, 0x0A);		/* shutdown code: warm reset upon init ipi */
+	lapicstartap(apic, PADDR(APBOOTSTRAP));
+	for(i = 0; i < 1000; i++){
+		if(apic->online)
+			break;
+		delay(10);
+	}
+	nvramwrite(0x0F, 0x00);
+}
--- /dev/null
+++ b/sys/src/9/pc64/trap.c
@@ -1,0 +1,1065 @@
+#include	"u.h"
+#include	"tos.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"ureg.h"
+#include	"../port/error.h"
+#include	<trace.h>
+
+static int trapinited;
+
+void	noted(Ureg*, ulong);
+
+static void debugbpt(Ureg*, void*);
+static void fault386(Ureg*, void*);
+static void doublefault(Ureg*, void*);
+static void unexpected(Ureg*, void*);
+static void _dumpstack(Ureg*);
+
+static Lock vctllock;
+static Vctl *vctl[256];
+
+enum
+{
+	Ntimevec = 20		/* number of time buckets for each intr */
+};
+ulong intrtimes[256][Ntimevec];
+
+void
+intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
+{
+	int vno;
+	Vctl *v;
+
+	if(f == nil){
+		print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
+			irq, tbdf, name);
+		return;
+	}
+
+	if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
+		print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
+			irq, tbdf, name);
+		irq = -1;
+	}
+
+	if((v = xalloc(sizeof(Vctl))) == nil)
+		panic("intrenable: out of memory");
+	v->isintr = 1;
+	v->irq = irq;
+	v->tbdf = tbdf;
+	v->f = f;
+	v->a = a;
+	strncpy(v->name, name, KNAMELEN-1);
+	v->name[KNAMELEN-1] = 0;
+
+	ilock(&vctllock);
+	vno = arch->intrenable(v);
+	if(vno == -1){
+		iunlock(&vctllock);
+		print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
+			irq, tbdf, v->name);
+		xfree(v);
+		return;
+	}
+	if(vctl[vno]){
+		if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
+			panic("intrenable: handler: %s %s %#p %#p %#p %#p",
+				vctl[vno]->name, v->name,
+				vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
+		v->next = vctl[vno];
+	}
+	vctl[vno] = v;
+	iunlock(&vctllock);
+}
+
+int
+intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
+{
+	Vctl **pv, *v;
+	int vno;
+
+	/*
+	 * For now, none of this will work with the APIC code,
+	 * there is no mapping between irq and vector as the IRQ
+	 * is pretty meaningless.
+	 */
+	if(arch->intrvecno == nil)
+		return -1;
+	vno = arch->intrvecno(irq);
+	ilock(&vctllock);
+	pv = &vctl[vno];
+	while (*pv &&
+		  ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
+		   strcmp((*pv)->name, name)))
+		pv = &((*pv)->next);
+	assert(*pv);
+
+	v = *pv;
+	*pv = (*pv)->next;	/* Link out the entry */
+
+	if(vctl[vno] == nil && arch->intrdisable != nil)
+		arch->intrdisable(irq);
+	iunlock(&vctllock);
+	xfree(v);
+	return 0;
+}
+
+static long
+irqallocread(Chan*, void *vbuf, long n, vlong offset)
+{
+	char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
+	int m, vno;
+	long oldn;
+	Vctl *v;
+
+	if(n < 0 || offset < 0)
+		error(Ebadarg);
+
+	oldn = n;
+	buf = vbuf;
+	for(vno=0; vno<nelem(vctl); vno++){
+		for(v=vctl[vno]; v; v=v->next){
+			m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
+			if(m <= offset)	/* if do not want this, skip entry */
+				offset -= m;
+			else{
+				/* skip offset bytes */
+				m -= offset;
+				p = str+offset;
+				offset = 0;
+
+				/* write at most max(n,m) bytes */
+				if(m > n)
+					m = n;
+				memmove(buf, p, m);
+				n -= m;
+				buf += m;
+
+				if(n == 0)
+					return oldn;
+			}
+		}
+	}
+	return oldn - n;
+}
+
+void
+trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
+{
+	Vctl *v;
+
+	if(vno < 0 || vno >= VectorPIC)
+		panic("trapenable: vno %d", vno);
+	if((v = xalloc(sizeof(Vctl))) == nil)
+		panic("trapenable: out of memory");
+	v->tbdf = BUSUNKNOWN;
+	v->f = f;
+	v->a = a;
+	strncpy(v->name, name, KNAMELEN-1);
+	v->name[KNAMELEN-1] = 0;
+
+	ilock(&vctllock);
+	if(vctl[vno])
+		v->next = vctl[vno]->next;
+	vctl[vno] = v;
+	iunlock(&vctllock);
+}
+
+static void
+nmienable(void)
+{
+	int x;
+
+	/*
+	 * Hack: should be locked with NVRAM access.
+	 */
+	outb(0x70, 0x80);		/* NMI latch clear */
+	outb(0x70, 0);
+
+	x = inb(0x61) & 0x07;		/* Enable NMI */
+	outb(0x61, 0x08|x);
+	outb(0x61, x);
+}
+
+void
+trapinit0(void)
+{
+	u32int d1, v;
+	uintptr vaddr;
+	Segdesc *idt;
+
+	idt = (Segdesc*)IDTADDR;
+	vaddr = (uintptr)vectortable;
+	for(v = 0; v < 256; v++){
+		d1 = (vaddr & 0xFFFF0000)|SEGP;
+		switch(v){
+
+		case VectorBPT:
+			d1 |= SEGPL(3)|SEGIG;
+			break;
+
+		case VectorSYSCALL:
+			d1 |= SEGPL(3)|SEGIG;
+			break;
+
+		default:
+			d1 |= SEGPL(0)|SEGIG;
+			break;
+		}
+
+		idt->d0 = (vaddr & 0xFFFF)|(KESEL<<16);
+		idt->d1 = d1;
+		idt++;
+
+		idt->d0 = (vaddr >> 32);
+		idt->d1 = 0;
+		idt++;
+
+		vaddr += 6;
+	}
+}
+
+void
+trapinit(void)
+{
+	/*
+	 * Special traps.
+	 * Syscall() is called directly without going through trap().
+	 */
+	trapenable(VectorBPT, debugbpt, 0, "debugpt");
+	trapenable(VectorPF, fault386, 0, "fault386");
+	trapenable(Vector2F, doublefault, 0, "doublefault");
+	trapenable(Vector15, unexpected, 0, "unexpected");
+	nmienable();
+	addarchfile("irqalloc", 0444, irqallocread, nil);
+	trapinited = 1;
+}
+
+static char* excname[32] = {
+	"divide error",
+	"debug exception",
+	"nonmaskable interrupt",
+	"breakpoint",
+	"overflow",
+	"bounds check",
+	"invalid opcode",
+	"coprocessor not available",
+	"double fault",
+	"coprocessor segment overrun",
+	"invalid TSS",
+	"segment not present",
+	"stack exception",
+	"general protection violation",
+	"page fault",
+	"15 (reserved)",
+	"coprocessor error",
+	"alignment check",
+	"machine check",
+	"19 (reserved)",
+	"20 (reserved)",
+	"21 (reserved)",
+	"22 (reserved)",
+	"23 (reserved)",
+	"24 (reserved)",
+	"25 (reserved)",
+	"26 (reserved)",
+	"27 (reserved)",
+	"28 (reserved)",
+	"29 (reserved)",
+	"30 (reserved)",
+	"31 (reserved)",
+};
+
+/*
+ *  keep histogram of interrupt service times
+ */
+void
+intrtime(Mach*, int vno)
+{
+	ulong diff;
+	ulong x;
+
+	x = perfticks();
+	diff = x - m->perf.intrts;
+	m->perf.intrts = x;
+
+	m->perf.inintr += diff;
+	if(up == nil && m->perf.inidle > diff)
+		m->perf.inidle -= diff;
+
+	diff /= m->cpumhz*100;		/* quantum = 100µsec */
+	if(diff >= Ntimevec)
+		diff = Ntimevec-1;
+	intrtimes[vno][diff]++;
+}
+
+/* go to user space */
+void
+kexit(Ureg*)
+{
+	uvlong t;
+	Tos *tos;
+
+	/* precise time accounting, kernel exit */
+	tos = (Tos*)((uintptr)USTKTOP-sizeof(Tos));
+	cycles(&t);
+	tos->kcycles += t - up->kentry;
+	tos->pcycles = t + up->pcycles;
+	tos->pid = up->pid;
+}
+
+void
+display(char *s)
+{
+	char *d;
+
+	d = (char*)KADDR(0xB8000);
+	while(*s){
+		*d = *s++;
+		d += 2;
+	}
+}
+
+void
+trap(Ureg *ureg)
+{
+	int clockintr, i, vno, user;
+	char buf[ERRMAX];
+	Vctl *ctl, *v;
+	Mach *mach;
+
+	if(!trapinited){
+		/* fault386 can give a better error message */
+		if(ureg->type == VectorPF)
+			fault386(ureg, nil);
+		panic("trap %llud: not ready", ureg->type);
+	}
+
+	m->perf.intrts = perfticks();
+	user = userureg(ureg);
+	if(user){
+		up->dbgreg = ureg;
+		cycles(&up->kentry);
+	}
+
+	clockintr = 0;
+
+	vno = ureg->type;
+
+	if(ctl = vctl[vno]){
+		if(ctl->isintr){
+			m->intr++;
+			if(vno >= VectorPIC)
+				m->lastintr = ctl->irq;
+		}
+		if(ctl->isr)
+			ctl->isr(vno);
+		for(v = ctl; v != nil; v = v->next){
+			if(v->f)
+				v->f(ureg, v->a);
+		}
+		if(ctl->eoi)
+			ctl->eoi(vno);
+
+		if(ctl->isintr){
+			intrtime(m, vno);
+
+			if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
+				clockintr = 1;
+
+			if(up && !clockintr)
+				preempted();
+		}
+	}
+	else if(vno < nelem(excname) && user){
+		spllo();
+		sprint(buf, "sys: trap: %s", excname[vno]);
+		dumpregs(ureg);
+		postnote(up, 1, buf, NDebug);
+	}
+	else if(vno >= VectorPIC){
+		/*
+		 * An unknown interrupt.
+		 * Check for a default IRQ7. This can happen when
+		 * the IRQ input goes away before the acknowledge.
+		 * In this case, a 'default IRQ7' is generated, but
+		 * the corresponding bit in the ISR isn't set.
+		 * In fact, just ignore all such interrupts.
+		 */
+
+		/* call all interrupt routines, just in case */
+		for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
+			ctl = vctl[i];
+			if(ctl == nil)
+				continue;
+			if(!ctl->isintr)
+				continue;
+			for(v = ctl; v != nil; v = v->next){
+				if(v->f)
+					v->f(ureg, v->a);
+			}
+			/* should we do this? */
+			if(ctl->eoi)
+				ctl->eoi(i);
+		}
+
+		/* clear the interrupt */
+		i8259isr(vno);
+
+		if(0)print("cpu%d: spurious interrupt %d, last %d\n",
+			m->machno, vno, m->lastintr);
+		if(0)if(conf.nmach > 1){
+			for(i = 0; i < 32; i++){
+				if(!(active.machs & (1<<i)))
+					continue;
+				mach = MACHP(i);
+				if(m->machno == mach->machno)
+					continue;
+				print(" cpu%d: last %d",
+					mach->machno, mach->lastintr);
+			}
+			print("\n");
+		}
+		m->spuriousintr++;
+		if(user)
+			kexit(ureg);
+		return;
+	}
+	else{
+		if(vno == VectorNMI){
+			/*
+			 * Don't re-enable, it confuses the crash dumps.
+			nmienable();
+			 */
+			iprint("cpu%d: PC %#p\n", m->machno, ureg->pc);
+			while(m->machno != 0)
+				;
+		}
+
+		if(!user){
+			void (*pc)(void);
+
+			extern void _rdmsrinst(void);
+			extern void _wrmsrinst(void);
+
+			pc = (void*)ureg->pc;
+			if(pc == _rdmsrinst || pc == _wrmsrinst){
+				if(vno == VectorGPF){
+					ureg->bp = -1;
+					ureg->pc += 2;
+					return;
+				}
+			}
+		}
+
+		dumpregs(ureg);
+		if(!user){
+			ureg->sp = (uintptr)&ureg->sp;
+			_dumpstack(ureg);
+		}
+		if(vno < nelem(excname))
+			panic("%s", excname[vno]);
+		panic("unknown trap/intr: %d", vno);
+	}
+	splhi();
+
+	/* delaysched set because we held a lock or because our quantum ended */
+	if(up && up->delaysched && clockintr){
+		sched();
+		splhi();
+	}
+
+	if(user){
+		if(up->procctl || up->nnote)
+			notify(ureg);
+		kexit(ureg);
+	}
+}
+
+void
+dumpregs(Ureg* ureg)
+{
+	if(up)
+		iprint("cpu%d: registers for %s %lud\n",
+			m->machno, up->text, up->pid);
+	else
+		iprint("cpu%d: registers for kernel\n", m->machno);
+	iprint("FLAGS=%#p TYPE=%#p ERROR=%#p PC=%#p SP=%#p",
+		ureg->flags, ureg->type, ureg->error, ureg->pc, ureg->sp);
+	iprint("  AX %#p  BX %#p  CX %#p  DX %#p\n",
+		ureg->ax, ureg->bx, ureg->cx, ureg->dx);
+	iprint("  SI %#p  DI %#p  BP %#p\n",
+		ureg->si, ureg->di, ureg->bp);
+	iprint("  CS %4.4lluX  DS %4.4uX  ES %4.4uX  FS %4.4uX  GS %4.4uX\n",
+		ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
+		ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
+
+	/*
+	 * Processor control registers.
+	 * If machine check exception, time stamp counter, page size extensions
+	 * or enhanced virtual 8086 mode extensions are supported, there is a
+	 * CR4. If there is a CR4 and machine check extensions, read the machine
+	 * check address and machine check type registers if RDMSR supported.
+	 */
+	iprint("  CR0 %8.8llux CR2 %16.16llux CR3 %16.16llux",
+		getcr0(), getcr2(), getcr3());
+	if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
+		iprint(" CR4 %16.16llux", getcr4());
+		if((m->cpuiddx & (Mce|Cpumsr)) == (Mce|Cpumsr)){
+			vlong mca, mct;
+
+			rdmsr(0x00, &mca);
+			rdmsr(0x01, &mct);
+			iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
+		}
+	}
+	iprint("\n  ur %#p up %#p\n", ureg, up);
+}
+
+
+/*
+ * Fill in enough of Ureg to get a stack trace, and call a function.
+ * Used by debugging interface rdb.
+ */
+void
+callwithureg(void (*fn)(Ureg*))
+{
+	Ureg ureg;
+	ureg.pc = getcallerpc(&fn);
+	ureg.sp = (uintptr)&fn;
+	fn(&ureg);
+}
+
+static void
+_dumpstack(Ureg *ureg)
+{
+	uintptr l, v, i, estack;
+	extern ulong etext;
+	int x;
+	char *s;
+
+	if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
+		iprint("dumpstack disabled\n");
+		return;
+	}
+	iprint("dumpstack\n");
+
+	x = 0;
+	x += iprint("ktrace /kernel/path %#p %#p <<EOF\n", ureg->pc, ureg->sp);
+	i = 0;
+	if(up
+	&& (uintptr)&l >= (uintptr)up->kstack
+	&& (uintptr)&l <= (uintptr)up->kstack+KSTACK)
+		estack = (uintptr)up->kstack+KSTACK;
+	else if((uintptr)&l >= (uintptr)m->stack
+	&& (uintptr)&l <= (uintptr)m+MACHSIZE)
+		estack = (uintptr)m+MACHSIZE;
+	else
+		return;
+	x += iprint("estackx %p\n", estack);
+
+	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
+		v = *(uintptr*)l;
+		if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
+			/*
+			 * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
+			 * and CALL indirect through AX
+			 * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
+			 * but this is too clever and misses faulting address.
+			 */
+			x += iprint("%.8p=%.8p ", l, v);
+			i++;
+		}
+		if(i == 4){
+			i = 0;
+			x += iprint("\n");
+		}
+	}
+	if(i)
+		iprint("\n");
+	iprint("EOF\n");
+
+	if(ureg->type != VectorNMI)
+		return;
+
+	i = 0;
+	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
+		iprint("%.8p ", *(uintptr*)l);
+		if(++i == 8){
+			i = 0;
+			iprint("\n");
+		}
+	}
+	if(i)
+		iprint("\n");
+}
+
+void
+dumpstack(void)
+{
+	callwithureg(_dumpstack);
+}
+
+static void
+debugbpt(Ureg* ureg, void*)
+{
+	char buf[ERRMAX];
+
+	if(up == 0)
+		panic("kernel bpt");
+	/* restore pc to instruction that caused the trap */
+	ureg->pc--;
+	sprint(buf, "sys: breakpoint");
+	postnote(up, 1, buf, NDebug);
+}
+
+static void
+doublefault(Ureg*, void*)
+{
+	panic("double fault");
+}
+
+static void
+unexpected(Ureg* ureg, void*)
+{
+	print("unexpected trap %llud; ignoring\n", ureg->type);
+}
+
+extern void checkpages(void);
+static void
+fault386(Ureg* ureg, void*)
+{
+	uintptr addr;
+	int read, user, n, insyscall;
+	char buf[ERRMAX];
+
+	addr = getcr2();
+	read = !(ureg->error & 2);
+	user = userureg(ureg);
+	if(!user){
+		if(vmapsync(addr))
+			return;
+		if(addr >= USTKTOP)
+			panic("kernel fault: bad address pc=%#p addr=%#p", ureg->pc, addr);
+		if(up == nil)
+			panic("kernel fault: no user process pc=%#p addr=%#p", ureg->pc, addr);
+	}
+	if(up == nil)
+		panic("user fault: up=0 pc=%#p addr=%#p", ureg->pc, addr);
+
+	insyscall = up->insyscall;
+	up->insyscall = 1;
+	n = fault(addr, read);
+	if(n < 0){
+		dumpregs(ureg);
+		if(!user){
+			panic("fault: %#p", addr);
+		}
+		checkpages();
+		sprint(buf, "sys: trap: fault %s addr=%#p",
+			read ? "read" : "write", addr);
+		postnote(up, 1, buf, NDebug);
+	}
+	up->insyscall = insyscall;
+}
+
+/*
+ *  system calls
+ */
+#include "../port/systab.h"
+
+/*
+ *  Syscall is called directly from assembler without going through trap().
+ */
+void
+syscall(Ureg* ureg)
+{
+	char *e;
+	uintptr	sp;
+	long long ret;
+	int	i, s;
+	ulong scallnr;
+	vlong startns, stopns;
+
+	if(!userureg(ureg))
+		panic("syscall: cs 0x%4.4lluX", ureg->cs);
+
+	cycles(&up->kentry);
+
+	m->syscall++;
+	up->insyscall = 1;
+	up->pc = ureg->pc;
+	up->dbgreg = ureg;
+
+	sp = ureg->sp;
+	scallnr = ureg->ax;
+	up->scallnr = scallnr;
+
+	spllo();
+	startns = 0;
+	up->nerrlab = 0;
+	ret = -1;
+	if(!waserror()){
+		if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+		up->s = *((Sargs*)(sp+BY2WD));
+		if(0){
+			syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
+			print("syscall: %s\n", up->syscalltrace);
+		}
+
+		if(up->procctl == Proc_tracesyscall){
+			syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
+			s = splhi();
+			up->procctl = Proc_stopme;
+			procctl(up);
+			splx(s);
+			startns = todget(nil);
+		}
+		if(scallnr >= nsyscall || systab[scallnr] == 0){
+			pprint("bad sys call number %lud pc %#p\n",
+				scallnr, ureg->pc);
+			postnote(up, 1, "sys: bad sys call", NDebug);
+			error(Ebadarg);
+		}
+		up->psstate = sysctab[scallnr];
+		ret = systab[scallnr]((va_list)up->s.args);
+		poperror();
+	}else{
+		/* failure: save the error buffer for errstr */
+		e = up->syserrstr;
+		up->syserrstr = up->errstr;
+		up->errstr = e;
+		if(0 && up->pid == 1)
+			print("syscall %lud error %s\n", scallnr, up->syserrstr);
+	}
+	if(up->nerrlab){
+		print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
+		for(i = 0; i < NERR; i++)
+			print("sp=%#p pc=%#p\n",
+				up->errlab[i].sp, up->errlab[i].pc);
+		panic("error stack");
+	}
+
+	/*
+	 *  Put return value in frame.  On the x86 the syscall is
+	 *  just another trap and the return value from syscall is
+	 *  ignored.  On other machines the return value is put into
+	 *  the results register by caller of syscall.
+	 */
+	ureg->ax = ret;
+
+	if(0){
+		print("syscallret: %lud %s %s ret=%lld\n", 
+			up->pid, up->text, sysctab[scallnr], ret);
+	}
+
+	if(up->procctl == Proc_tracesyscall){
+		stopns = todget(nil);
+		sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
+		s = splhi();
+		up->procctl = Proc_stopme;
+		procctl(up);
+		splx(s);
+	}
+
+	up->insyscall = 0;
+	up->psstate = 0;
+
+	if(scallnr == NOTED)
+		noted(ureg, up->s.args[0]);
+
+	if(scallnr!=RFORK && (up->procctl || up->nnote)){
+		splhi();
+		notify(ureg);
+	}
+	/* if we delayed sched because we held a lock, sched now */
+	if(up->delaysched)
+		sched();
+	kexit(ureg);
+}
+
+/*
+ *  Call user, if necessary, with note.
+ *  Pass user the Ureg struct and the note on his stack.
+ */
+int
+notify(Ureg* ureg)
+{
+	int l, s;
+	uintptr sp;
+	Note *n;
+
+	if(up->procctl)
+		procctl(up);
+	if(up->nnote == 0)
+		return 0;
+
+	if(up->fpstate == FPactive){
+		fpsave(&up->fpsave);
+		up->fpstate = FPinactive;
+	}
+	up->fpstate |= FPillegal;
+
+	s = spllo();
+	qlock(&up->debug);
+	up->notepending = 0;
+	n = &up->note[0];
+	if(strncmp(n->msg, "sys:", 4) == 0){
+		l = strlen(n->msg);
+		if(l > ERRMAX-15)	/* " pc=0x12345678\0" */
+			l = ERRMAX-15;
+		sprint(n->msg+l, " pc=%#p", ureg->pc);
+	}
+
+	if(n->flag!=NUser && (up->notified || up->notify==0)){
+		qunlock(&up->debug);
+		if(n->flag == NDebug)
+			pprint("suicide: %s\n", n->msg);
+		pexit(n->msg, n->flag!=NDebug);
+	}
+
+	if(up->notified){
+		qunlock(&up->debug);
+		splhi();
+		return 0;
+	}
+
+	if(!up->notify){
+		qunlock(&up->debug);
+		pexit(n->msg, n->flag!=NDebug);
+	}
+	sp = ureg->sp;
+	sp -= 256;	/* debugging: preserve context causing problem */
+	sp -= sizeof(Ureg);
+if(0) print("%s %lud: notify %#p %#p %#p %s\n",
+	up->text, up->pid, ureg->pc, ureg->sp, sp, n->msg);
+
+	if(!okaddr((uintptr)up->notify, 1, 0)
+	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
+		qunlock(&up->debug);
+		pprint("suicide: bad address in notify\n");
+		pexit("Suicide", 0);
+	}
+
+	memmove((Ureg*)sp, ureg, sizeof(Ureg));
+	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */
+	up->ureg = (void*)sp;
+	sp -= BY2WD+ERRMAX;
+	memmove((char*)sp, up->note[0].msg, ERRMAX);
+	sp -= 3*BY2WD;
+	*(uintptr*)(sp+2*BY2WD) = sp+3*BY2WD;		/* arg 2 is string */
+	*(uintptr*)(sp+1*BY2WD) = (uintptr)up->ureg;	/* arg 1 is ureg* */
+	*(uintptr*)(sp+0*BY2WD) = 0;			/* arg 0 is pc */
+	ureg->sp = sp;
+	ureg->pc = (uintptr)up->notify;
+	ureg->cs = UESEL;
+	ureg->ss = ureg->ds = ureg->es = UDSEL;
+	up->notified = 1;
+	up->nnote--;
+	memmove(&up->lastnote, &up->note[0], sizeof(Note));
+	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+	qunlock(&up->debug);
+	splx(s);
+	return 1;
+
+}
+
+/*
+ *   Return user to state before notify()
+ */
+void
+noted(Ureg* ureg, ulong arg0)
+{
+	Ureg *nureg;
+	uintptr oureg, sp;
+
+	qlock(&up->debug);
+	if(arg0!=NRSTR && !up->notified) {
+		qunlock(&up->debug);
+		pprint("call to noted() when not notified\n");
+		pexit("Suicide", 0);
+	}
+	up->notified = 0;
+
+	nureg = up->ureg;	/* pointer to user returned Ureg struct */
+
+	up->fpstate &= ~FPillegal;
+
+	/* sanity clause */
+	oureg = (uintptr)nureg;
+	if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
+		qunlock(&up->debug);
+		pprint("bad ureg in noted or call to noted when not notified\n");
+		pexit("Suicide", 0);
+	}
+
+	/* don't let user change system flags */
+	nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
+	nureg->cs |= 3;
+	nureg->ss |= 3;
+
+	memmove(ureg, nureg, sizeof(Ureg));
+
+	switch(arg0){
+	case NCONT:
+	case NRSTR:
+if(0) print("%s %lud: noted %#p %#p\n",
+	up->text, up->pid, nureg->pc, nureg->sp);
+		if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->sp, BY2WD, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
+		qunlock(&up->debug);
+		break;
+
+	case NSAVE:
+		if(!okaddr(nureg->pc, BY2WD, 0)
+		|| !okaddr(nureg->sp, BY2WD, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		qunlock(&up->debug);
+		sp = oureg-4*BY2WD-ERRMAX;
+		splhi();
+		ureg->sp = sp;
+		((uintptr*)sp)[1] = oureg;	/* arg 1 0(FP) is ureg* */
+		((uintptr*)sp)[0] = 0;		/* arg 0 is pc */
+		break;
+
+	default:
+		up->lastnote.flag = NDebug;
+		/* fall through */
+
+	case NDFLT:
+		qunlock(&up->debug);
+		if(up->lastnote.flag == NDebug)
+			pprint("suicide: %s\n", up->lastnote.msg);
+		pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
+	}
+}
+
+uintptr
+execregs(uintptr entry, ulong ssize, ulong nargs)
+{
+	uintptr *sp;
+	Ureg *ureg;
+
+	sp = (uintptr*)(USTKTOP - ssize);
+	*--sp = nargs;
+	ureg = up->dbgreg;
+	ureg->sp = (uintptr)sp;
+	ureg->pc = entry;
+	ureg->cs = UESEL;
+	ureg->ss = ureg->ds = ureg->es = UDSEL;
+	ureg->fs = ureg->gs = NULLSEL;
+	return (uintptr)USTKTOP-sizeof(Tos);		/* address of kernel/user shared data */
+}
+
+/*
+ *  return the userpc the last exception happened at
+ */
+uintptr
+userpc(void)
+{
+	Ureg *ureg;
+
+	ureg = (Ureg*)up->dbgreg;
+	return ureg->pc;
+}
+
+/* This routine must save the values of registers the user is not permitted
+ * to write from devproc and then restore the saved values before returning.
+ */
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+	u64int flags;
+
+	flags = ureg->flags;
+	memmove(pureg, uva, n);
+	ureg->cs = UESEL;
+	ureg->ss = ureg->ds = ureg->es = UDSEL;
+	if(ureg->fs != UDSEL)
+		ureg->fs = NULLSEL;
+	if(ureg->gs != UDSEL)
+		ureg->gs = 0;
+	ureg->flags = (ureg->flags & 0x00ff) | (flags & 0xff00);
+}
+
+static void
+linkproc(void)
+{
+	spllo();
+	up->kpfun(up->kparg);
+	pexit("kproc dying", 0);
+}
+
+void
+kprocchild(Proc* p, void (*func)(void*), void* arg)
+{
+	/*
+	 * gotolabel() needs a word on the stack in
+	 * which to place the return PC used to jump
+	 * to linkproc().
+	 */
+	p->sched.pc = (uintptr)linkproc;
+	p->sched.sp = (uintptr)p->kstack+KSTACK-BY2WD;
+
+	p->kpfun = func;
+	p->kparg = arg;
+}
+
+void
+forkchild(Proc *p, Ureg *ureg)
+{
+	Ureg *cureg;
+
+	/*
+	 * Add 2*BY2WD to the stack to account for
+	 *  - the return PC
+	 *  - trap's argument (ur)
+	 */
+	p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
+	p->sched.pc = (uintptr)forkret;
+
+	cureg = (Ureg*)(p->sched.sp+2*BY2WD);
+	memmove(cureg, ureg, sizeof(Ureg));
+
+	cureg->ax = 0;
+
+	/* Things from bottom of syscall which were never executed */
+	p->psstate = 0;
+	p->insyscall = 0;
+}
+
+/* Give enough context in the ureg to produce a kernel stack for
+ * a sleeping process
+ */
+void
+setkernur(Ureg* ureg, Proc* p)
+{
+	ureg->pc = p->sched.pc;
+	ureg->sp = p->sched.sp+8;
+}
+
+uintptr
+dbgpc(Proc *p)
+{
+	Ureg *ureg;
+
+	ureg = p->dbgreg;
+	if(ureg == 0)
+		return 0;
+
+	return ureg->pc;
+}
--- a/sys/src/9/port/devcons.c
+++ b/sys/src/9/port/devcons.c
@@ -601,15 +601,15 @@
 
 	case Qswap:
 		snprint(tmp, sizeof tmp,
-			"%lud memory\n"
-			"%d pagesize\n"
+			"%llud memory\n"
+			"%llud pagesize\n"
 			"%lud kernel\n"
 			"%lud/%lud user\n"
 			"%lud/%lud swap\n"
 			"%lud/%lud kernel malloc\n"
 			"%lud/%lud kernel draw\n",
-			conf.npage*BY2PG,
-			BY2PG,
+			(uvlong)conf.npage*BY2PG,
+			(uvlong)BY2PG,
 			conf.npage-conf.upages,
 			palloc.user-palloc.freecount, palloc.user,
 			conf.nswap-swapalloc.free, conf.nswap,
--- a/sys/src/9/port/mkdevc
+++ b/sys/src/9/port/mkdevc
@@ -109,7 +109,7 @@
 		printf "\t%slink();\n", link[i];
 	printf "}\n\n";
 
-	if(narch || objtype == "386" || objtype == "amd64"){
+	if(narch || objtype ~ "(386|amd64)"){
 		for(i = 0; i < narch; i++)
 			printf "extern PCArch %s;\n", arch[i];
 		printf "PCArch* knownarch[] = {\n";
--- a/sys/src/9/port/mkdevlist
+++ b/sys/src/9/port/mkdevlist
@@ -40,7 +40,7 @@
 	x = ""
 	for(i in obj)
 		x = x i "\n";
-	if(objtype ~ "386" && obj["pci" "'.$O'"])
+	if((objtype ~ "386" || objtype ~ "amd64") && obj["pci" "'.$O'"])
 		x = x "bios32'.$O' \n";
 	printf x;
 }' $*