shithub: riscv

Download patch

ref: 77f3fa19de87be8ded57a038cab60b240b191dab
parent: 709e78b9f9a76658cc6c704176ebf8f5821ee9d2
author: aiju <devnull@localhost>
date: Sat Sep 27 16:50:20 EDT 2014

games/gba: slowly working (no pun intended)

--- /dev/null
+++ b/sys/src/games/gba/cpu.c
@@ -1,0 +1,1269 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+enum {
+	FLAGN = 1<<31,
+	FLAGZ = 1<<30,
+	FLAGC = 1<<29,
+	FLAGV = 1<<28,
+	FLAGT = 1<<5,
+	FLAGI = 1<<7,
+	
+	MUSR = 0x10,
+	MFIQ = 0x11,
+	MIRQ = 0x12,
+	MSVC = 0x13,
+	MABT = 0x17,
+	MUND = 0x1b,
+	MSYS = 0x1f,
+	MODE = 0x1f,
+	
+	R13USR = 0, R14USR, R13FIQ, R14FIQ,
+	R13SVC, R14SVC, R13ABT, R14ABT,
+	R13IRQ, R14IRQ, R13UND, R14UND,
+	SPSRFIQ, SPSRSVC, SPSRABT, SPSRIRQ, SPSRUND,
+	R8USR, R9USR, R10USR, R11USR, R12USR,
+	R8FIQ, R9FIQ, R10FIQ, R11FIQ, R12FIQ
+};
+u32int r[16], cpsr, spsr;
+u32int saver[R12FIQ+1];
+u32int curpc;
+int irq;
+
+u32int instr0, instr1, pipel = -1;
+int cyc, trace;
+
+#define pipeflush() {io(); pipel = -1;}
+#define io() cyc++
+
+static int steparm(void);
+static int stepthumb(void);
+int (*step)(void);
+
+void
+undefined(u32int instr)
+{
+	if((cpsr & FLAGT) != 0)
+		sysfatal("undefined opcode %#.4ux (pc=%#.8ux)", (u16int)instr, curpc);
+	else
+		sysfatal("undefined opcode %#.8ux (pc=%#.8ux)", instr, curpc);
+}
+
+int
+cond(int n, u32int instr)
+{
+	switch(n){
+	case 0: return (cpsr & FLAGZ) != 0;
+	case 1: return (cpsr & FLAGZ) == 0;
+	case 2: return (cpsr & FLAGC) != 0;
+	case 3: return (cpsr & FLAGC) == 0;
+	case 4: return (cpsr & FLAGN) != 0;
+	case 5: return (cpsr & FLAGN) == 0;
+	case 6: return (cpsr & FLAGV) != 0;
+	case 7: return (cpsr & FLAGV) == 0;
+	case 8: return (cpsr & (FLAGC|FLAGZ)) == FLAGC;
+	case 9: return (cpsr & (FLAGC|FLAGZ)) != FLAGC;
+	case 10: return ((cpsr ^ cpsr << 3) & FLAGN) == 0;
+	case 11: return ((cpsr ^ cpsr << 3) & FLAGN) != 0;
+	case 12: return ((cpsr ^ cpsr << 3) & (FLAGN|FLAGZ)) == 0;
+	case 13: return ((cpsr ^ cpsr << 3) & (FLAGN|FLAGZ)) != 0;
+	case 14: return 1;
+	}
+	undefined(instr);
+	return 0;
+}
+
+static void
+setcpsr(int n)
+{
+	if((n & FLAGT) != 0)
+		step = stepthumb;
+	else
+		step = steparm;
+	if((cpsr & MODE) == (n & MODE)){
+		cpsr = n;
+		return;
+	}
+	switch(cpsr & MODE){
+	case MUSR:
+	case MSYS:
+		saver[R13USR] = r[13];
+		saver[R14USR] = r[14];
+		break;
+	case MFIQ:
+		saver[R13FIQ] = r[13];
+		saver[R14FIQ] = r[14];
+		saver[SPSRFIQ] = spsr;
+		break;
+	case MSVC:
+		saver[R13SVC] = r[13];
+		saver[R14SVC] = r[14];
+		saver[SPSRSVC] = spsr;
+		break;
+	case MABT:
+		saver[R13ABT] = r[13];
+		saver[R14ABT] = r[14];
+		saver[SPSRABT] = spsr;
+		break;
+	case MIRQ:
+		saver[R13IRQ] = r[13];
+		saver[R14IRQ] = r[14];
+		saver[SPSRIRQ] = spsr;
+		break;
+	case MUND:
+		saver[R13UND] = r[13];
+		saver[R14UND] = r[14];
+		saver[SPSRUND] = spsr;
+		break;
+	}
+	switch(n & MODE){
+	case MUSR:
+	case MSYS:
+		r[13] = saver[R13USR];
+		r[14] = saver[R14USR];
+		break;
+	case MFIQ:
+		r[13] = saver[R13FIQ];
+		r[14] = saver[R14FIQ];
+		spsr = saver[SPSRFIQ];
+		break;
+	case MSVC:
+		r[13] = saver[R13SVC];
+		r[14] = saver[R14SVC];
+		spsr = saver[SPSRSVC];
+		break;
+	case MABT:
+		r[13] = saver[R13ABT];
+		r[14] = saver[R14ABT];
+		spsr = saver[SPSRABT];
+		break;
+	case MIRQ:
+		r[13] = saver[R13IRQ];
+		r[14] = saver[R14IRQ];
+		spsr = saver[SPSRIRQ];
+		break;
+	case MUND:
+		r[13] = saver[R13UND];
+		r[14] = saver[R14UND];
+		spsr = saver[SPSRUND];
+		break;
+	default:
+		sysfatal("invalid mode switch to %#x (pc=%#.8x)", n, curpc);
+	}
+	if((cpsr & MODE) == MFIQ){
+		memcpy(&saver[R8FIQ], &r[8], 5*4);
+		memcpy(&r[8], &saver[R8USR], 5*4);
+	}
+	if((n & MODE) == MFIQ){
+		memcpy(&saver[R8USR], &r[8], 5*4);
+		memcpy(&r[8], &saver[R8FIQ], 5*4);
+	}
+	cpsr = n;
+}
+
+static void
+interrupt(int src)
+{
+	u32int v;
+	
+	v = cpsr;
+	setcpsr(cpsr & ~(MODE|FLAGI|FLAGT) | FLAGI | src);
+	spsr = v;
+	switch(src){
+	case MIRQ:
+		if((v & FLAGT) != 0)
+			r[14] = r[15];
+		else
+			r[14] = r[15] - 4;
+		r[15] = 0x18;
+		break;
+	case MSVC:
+		if((v & FLAGT) != 0)
+			r[14] = r[15] - 2;
+		else
+			r[14] = r[15] - 4;
+		r[15] = 0x08;
+		break;
+	default:
+		sysfatal("unknown exception %x\n", src);
+	}
+	pipeflush();
+}
+
+static void
+mulspeed(u32int val)
+{
+	if((int)val < 0) val = ~val;
+	if((val >> 8) == 0)
+		cyc += 1;
+	else if((val >> 16) == 0)
+		cyc += 2;
+	else if((val >> 24) == 0)
+		cyc += 3;
+	else
+		cyc += 4;
+}
+
+static void
+armextra(u32int instr)
+{
+	int Rn, Rd, Rm, Rs, sh;
+	u32int addr, off, val;
+	u64int vall;
+	enum {
+		SIGN = 1<<6,
+		HALF = 1<<5,
+		LOAD = 1<<20,
+		WRBACK = 1<<21,
+		IMM = 1<<22,
+		ADD = 1<<23,
+		PRE = 1<<24,
+		
+		BYTE = 1<<22,
+		
+		LONG = 1<<23,
+		MSIGN = 1<<22,
+		ACC = 1<<21,
+		FLAGS = 1<<20,
+	};
+	
+	Rm = instr & 0xf;
+	Rn = instr >> 16 & 0xf;
+	Rd = instr >> 12 & 0xf;
+	if((instr & 0x60) == 0){
+		if((instr & 1<<24) != 0){
+			addr = r[Rn];
+			if((instr & 0x0ffffff0) == 0x012fff10){
+				r[14] = r[15] - 4;
+				r[15] = r[Rm];
+				setcpsr(cpsr | FLAGT);
+				pipeflush();
+			}else if((instr & BYTE) != 0){
+				io();
+				val = (u8int) memread(addr, 1, 0);
+				memwrite(addr, (u8int) r[Rm], 1);
+				r[Rd] = val;
+			}else{
+				io();
+				val = memread(addr & ~3, 4, 0);
+				if((addr & 3) != 0){
+					sh = (addr & 3) << 2;
+					val = val >> sh | val << 32 - sh;
+				}
+				memwrite(addr, r[Rm], 4);
+				r[Rd] = val;	
+			}
+		}else{
+			Rs = instr >> 8 & 0xf;
+			mulspeed(r[Rs]);
+			if((instr & LONG) != 0){
+				if((instr & ACC) != 0){
+					vall = (u64int)r[Rn] << 32 | r[Rd];
+					io();
+				}else
+					vall = 0;
+				io();
+				if((instr & MSIGN) == 0)
+					vall += ((u64int) r[Rs]) * r[Rm];
+				else
+					vall += (s64int) ((s32int) r[Rs]) * (s32int) r[Rm];
+				r[Rn] = vall >> 32;
+				r[Rd] = vall;
+				if((instr & FLAGS) != 0){
+					cpsr &= ~(FLAGN|FLAGZ|FLAGC);
+					if(vall == 0)
+						cpsr |= FLAGZ;
+					if((s64int)vall < 0)
+						cpsr |= FLAGN;
+				}
+			}else{
+				val = r[Rs] * r[Rm];
+				if((instr & ACC) != 0){
+					val += r[Rd];
+					io();
+				}
+				if((instr & FLAGS) != 0){
+					cpsr &= ~(FLAGN|FLAGZ|FLAGC);
+					if(val == 0)
+						cpsr |= FLAGZ;
+					if((int)val < 0)
+						cpsr |= FLAGN;
+				}
+				r[Rn] = val;
+			}
+		}
+		return;
+	}
+	if((instr & IMM) == 0)
+		off = r[Rm];
+	else
+		off = instr & 0xf | instr >> 4 & 0xf0;
+	if((instr & ADD) == 0)
+		off = -off;
+	addr = r[Rn];
+	if((instr & PRE) != 0)
+		addr += off;
+	switch(instr & (HALF|LOAD)){
+	case 0:
+		memwrite(addr, (u8int) r[Rd], 1);
+		break;
+	case HALF:
+		memwrite(addr & ~1, (u16int) r[Rd], 2);
+		break;
+	case LOAD:
+		io();
+		r[Rd] = (u8int) memread(addr, 1, 0);
+		if((instr & SIGN) != 0)
+			r[Rd] = (s8int) r[Rd];
+		break;
+	case LOAD|HALF:
+		io();
+		val = (u16int) memread(addr & ~1, 2, 0);
+		if((instr & SIGN) != 0)
+			val = (s16int) val;
+		if((addr & 1) != 0)
+			val = val << 8 | val >> 24;
+		r[Rd] = val;
+		break;
+	}
+	if((instr & PRE) == 0)
+		addr += off;
+	if((instr & (WRBACK|PRE)) != PRE && Rn != Rd)
+		r[Rn] = addr;
+}
+
+static void
+armsr(u32int instr)
+{
+	int Rd, Rs;
+	u32int op, op0;
+	
+	if((instr & 0x0fbf0fff) == 0x010f0000){
+		Rd = instr >> 12 & 0xf;
+		r[Rd] = (instr & 1<<22) != 0 ? spsr : cpsr;
+		return;
+	}
+	if((instr & 0x0fb0fff0) == 0x0120f000){
+		Rs = instr & 0xf;
+		op = r[Rs];
+	msr:
+		op0 = 0;
+		if((instr & 1<<16) != 0) op0 |= 0xff;
+		if((instr & 1<<17) != 0) op0 |= 0xff00;
+		if((instr & 1<<18) != 0) op0 |= 0xff0000;
+		if((instr & 1<<19) != 0) op0 |= 0xff000000;
+		if((instr & 1<<22) != 0)
+			spsr = spsr & ~op0 | op & op0;
+		else
+			setcpsr(cpsr & ~op0 | op & op0);
+		if((cpsr & FLAGT) != 0)
+			sysfatal("illegal MSR to CPSR (T bit set, val=%#.8ux, pc=%#.8ux)", cpsr, curpc);
+		return;
+	}
+	if((instr & 0x0fb0f000) == 0x0320f000){
+		op = (u8int) instr;
+		Rs = instr >> 7 & 0x1e;
+		op = op >> Rs | op << 32 - Rs;
+		goto msr;
+	}
+	if((instr & 0x0ffffff0) == 0x012fff10){
+		Rs = instr & 0xf;
+		op = r[Rs];
+		if((op & 1) != 0)
+			setcpsr(cpsr | FLAGT);
+		r[15] = op & ~1;
+		pipeflush();
+		return;
+	}
+	undefined(instr);
+}
+
+static void
+armalu(u32int instr)
+{
+	int Rn, Rd, Rs, Rm, oper, sbit;
+	u32int op, op0, res;
+	u64int res64;
+	int sh;
+	int cout;
+
+	if((instr & (1<<25|0x90)) == 0x90){
+		armextra(instr);
+		return;
+	}
+	Rn = instr >> 16 & 0xf;
+	Rd = instr >> 12 & 0xf;
+	Rs = instr >> 8 & 0xf;
+	if((instr & 1<<25) == 0){
+		Rm = instr & 0xf;
+		op = r[Rm];
+		if((instr & 1<<4) == 0)
+			sh = instr >> 7 & 0x1f;
+		else{
+			sh = (u8int) r[Rs];
+			if(Rm == 15)
+				op += 4; /* undocumented behaviour */
+		}
+		switch(instr >> 5 & 3){
+		default:
+			if(sh == 0)
+				cout = cpsr >> 29;
+			else if(sh < 32){
+				cout = op >> 32 - sh;
+				op = op << sh;
+			}else if(sh == 32){
+				cout = op;
+				op = 0;
+			}else
+				cout = op = 0;
+			break;
+		case 1:
+			if(sh == 0)
+				if((instr & 1<<4) != 0)
+					cout = cpsr >> 29;
+				else{
+					cout = op >> 31;
+					op = 0;
+				}
+			else if(sh < 32 && sh != 0){
+				cout = op >> sh - 1;
+				op = op >> sh;
+			}else if(sh == 32){
+				cout = op >> 31;
+				op = 0;
+			}else
+				cout = op = 0;
+			break;
+		case 2:
+			if(sh == 0){
+				if((instr & 1<<4) != 0)
+					cout = cpsr >> 29;
+				else
+					cout = op = -((int)op < 0);
+			}else if(sh < 32){
+				cout = op >> sh - 1;
+				op = ((int) op) >> sh;
+			}else
+				cout = op = -((int)op < 0);
+			break;
+		case 3:
+			if(sh == 0){
+				if((instr & 1<<4) != 0)
+					cout = cpsr >> 29;
+				else{
+					cout = op;
+					op = op >> 1 | (cpsr & FLAGC) << 2;
+				}
+			}else{
+				sh &= 31;
+				if(sh == 0)
+					cout = op >> 31;
+				else{
+					cout = op >> sh - 1;
+					op = op << 32 - sh | op >> sh;
+				}
+			}
+			break;
+		}
+		cyc++;
+	}else{
+		op = (u8int) instr;
+		Rs <<= 1;
+		if(Rs != 0){
+			op = op << 32 - Rs | op >> Rs;
+			cout = op >> 31;
+		}else
+			cout = cpsr >> 29;
+	}
+	sbit = instr & 1<<20;
+	op0 = r[Rn];
+	oper = instr >> 21 & 0xf;
+	SET(res64);
+	switch(oper){
+	default: case 0: case 8: res = op0 & op; break;
+	case 1: case 9: res = op0 ^ op; break;
+	case 2: case 10: res64 = (uvlong) op0 + ~op + 1; res = res64; break;
+	case 3: res64 = (uvlong) op + ~op0 + 1; res = res64; break;
+	case 4: case 11: res64 = (uvlong) op0 + op; res = res64; break;
+	case 5: res64 = (uvlong) op0 + op + (cpsr >> 29 & 1); res = res64; break;
+	case 6: res64 = (uvlong) op0 + ~op + (cpsr >> 29 & 1); res = res64; break;
+	case 7: res64 = (uvlong) op + ~op0 + (cpsr >> 29 & 1); res = res64; break;
+	case 12: res = op0 | op; break;
+	case 13: res = op; break;
+	case 14: res = op0 & ~op; break;
+	case 15: res = ~op; break;
+	}
+	if(sbit){
+		switch(oper){
+		case 2: case 6: case 10:
+			cpsr &= ~(FLAGC|FLAGN|FLAGZ|FLAGV);
+			if(res64 >> 32 != 0)
+				cpsr |= FLAGC;
+			if(((op0 ^ op) & (op0 ^ res) & 1<<31) != 0)
+				cpsr |= FLAGV;
+			break;
+		case 3: case 7:
+			cpsr &= ~(FLAGC|FLAGN|FLAGZ|FLAGV);
+			if(res64 >> 32 != 0)
+				cpsr |= FLAGC;
+			if(((op ^ op0) & (op ^ res) & 1<<31) != 0)
+				cpsr |= FLAGV;
+			break;
+		case 4: case 5: case 11:
+			cpsr &= ~(FLAGC|FLAGN|FLAGZ|FLAGV);
+			if(res64 >> 32 != 0)
+				cpsr |= FLAGC;
+			if((~(op ^ op0) & (op ^ res) & 1<<31) != 0)
+				cpsr |= FLAGV;
+			break;
+		default:
+			cpsr &= ~(FLAGC|FLAGN|FLAGZ);
+			if(cout & 1)
+				cpsr |= FLAGC;
+			break;
+		}
+		if(res == 0)
+			cpsr |= FLAGZ;
+		if((res & 1<<31) != 0)
+			cpsr |= FLAGN;
+	}
+	if(oper < 8 || oper > 11){
+		r[Rd] = res;
+		if(Rd == 15){
+			if(sbit)
+				setcpsr(spsr);
+			pipeflush();
+		}
+	}else if(!sbit){
+		if((instr & 1<<25) != 0)
+			cyc--;
+		armsr(instr);
+	}
+}
+
+static void
+armsingle(u32int instr)
+{
+	int op, Rn, Rd, Rm;
+	u32int off, addr, val, sh;
+	enum {
+		LOAD = 1<<0,
+		WRBACK = 1<<1,
+		BYTE = 1<<2,
+		ADD = 1<<3,
+		PRE = 1<<4,
+		REGOFF = 1<<5
+	};
+	
+	op = instr >> 20;
+	Rn = instr >> 16 & 0xf;
+	Rd = instr >> 12 & 0xf;
+	if((op & REGOFF) != 0){
+		Rm = instr & 0xf;
+		off = r[Rm];
+		if((instr & 0xfff0) != 0){
+			sh = instr >> 7 & 0x1f;
+			switch(instr >> 5 & 3){
+			case 0: off = off << sh; break;
+			case 1:
+				if(sh == 0)
+					off = 0;
+				else
+					off = off >> sh;
+				break;
+			case 2:
+				if(sh == 0)
+					off = -((int)off < 0);
+				else
+					off = ((int)off) >> sh;
+				break;
+			case 3:
+				if(sh == 0)
+					off = off >> 1 | (cpsr & FLAGC) << 2;
+				else	
+					off = off >> sh | off << 32 - sh;
+				break;
+			}
+		}
+	}else
+		off = instr & 0xfff;
+	if((op & ADD) == 0)
+		off = -off;
+	addr = r[Rn];
+	if((op & PRE) != 0)
+		addr += off;
+	io();
+	switch(op & (LOAD|BYTE)){
+	case 0:
+		memwrite(addr & ~3, r[Rd], 4);
+		break;
+	case BYTE:
+		memwrite(addr, r[Rd], 1);
+		break;
+	case LOAD:
+		val = memread(addr & ~3, 4, 0);
+		if((addr & 3) != 0){
+			sh = (addr & 3) << 3;
+			val = val >> sh | val << 32 - sh;
+		}
+		r[Rd] = val;
+		io();
+		if(Rd == 15)
+			pipeflush();
+		break;
+	case LOAD|BYTE:
+		r[Rd] = (u8int) memread(addr, 1, 0);
+		io();
+		if(Rd == 15)
+			pipeflush();
+		break;
+	}
+	if((op & PRE) == 0)
+		addr += off;
+	if((op & (WRBACK|PRE)) != PRE && Rn != Rd)
+		r[Rn] = addr;
+}
+
+static void
+armmulti(u32int instr)
+{
+	int i, Rn, pop, user;
+	u32int addr, val, *rp;
+	u16int bits;
+	int seq;
+	enum {
+		LOAD = 1<<20,
+		WRBACK = 1<<21,
+		USER = 1<<22,
+		UP = 1<<23,
+		PRE = 1<<24,
+	};
+	
+	Rn = instr >> 16 & 0xf;
+	addr = r[Rn] & ~3;
+	if((instr & LOAD) != 0)
+		io();
+	for(bits = instr, pop = 0; bits != 0; pop++)
+		bits &= bits - 1;
+	pop <<= 2;
+	user = (instr & (USER|1<<15)) == USER;
+	switch(instr & (PRE|UP)){
+	default:
+		val = addr - pop;
+		addr = val + 4;
+		break;
+	case PRE:
+		addr = val = addr - pop;
+		break;
+	case UP:
+		val = addr + pop;
+		break;
+	case UP|PRE:
+		val = addr + pop;
+		addr += 4;
+		break;
+	}
+	seq = 0;
+	for(i = 0; i < 16; i++){
+		if((instr & 1<<i) == 0)
+			continue;
+		if(user)
+			switch(i){
+			case 8: case 9: case 10: case 11: case 12:
+				if((cpsr & MODE) == MFIQ){
+					rp = &saver[R8USR + i - 8];
+					break;
+				}
+			default: rp = &r[i]; break;
+			case 13: rp = &saver[R13USR]; break;
+			case 14: rp = &saver[R14USR]; break;
+			}
+		else
+			rp = &r[i];
+		if((instr & LOAD) != 0)
+			*rp = memread(addr, 4, seq);
+		else
+			memwrite(addr, *rp, 4);
+		addr += 4;
+		seq = 1;
+	}
+	/* undocumented: if Rn is the first register set in a load, it's overwritten if writeback is specified */
+	if((instr & WRBACK) != 0 && ((instr & LOAD) == 0 || (instr & instr-1 & 1<<Rn) == 0))
+		r[Rn] = val;
+	if((instr & (LOAD|1<<15)) == (LOAD|1<<15)){
+		if((instr & USER) != 0)
+			setcpsr(spsr);
+		pipeflush();
+	}
+}
+
+static void
+armbranch(u32int instr)
+{
+	int a;
+	
+	a = instr & 0xffffff;
+	a = (a << 8) >> 6;
+	if((instr & 1<<24) != 0)
+		r[14] = r[15] - 4;
+	r[15] += a;
+	pipeflush();
+}
+
+static int
+steparm(void)
+{
+	int s;
+	u32int instr;
+
+	cyc = 0;
+	if((pipel & 2) != 0)
+		goto fetch;
+	if(irq && (cpsr & FLAGI) == 0){
+		interrupt(MIRQ);
+		return 1;
+	}
+	curpc = r[15] - 8;
+	instr = instr1;
+	if(trace)
+		print("A %.8ux %.8ux %.8ux %.8ux %.8ux | %.8ux %.8ux %.8ux %.8ux | %.8ux %.8ux %.8ux %.8ux\n", curpc, instr, cpsr, r[13], r[14], r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7]);
+	if(instr >> 28 != 0xe && !cond(instr >> 28, instr))
+		goto fetch;
+	switch(instr >> 24 & 0xf){
+	case 0: case 1: case 2: case 3:
+		armalu(instr);
+		break;
+	case 4: case 5: case 6: case 7:
+		armsingle(instr);
+		break;
+	case 8: case 9:
+		armmulti(instr);
+		break;
+	case 10: case 11:
+		armbranch(instr);
+		break;
+	case 15:
+		interrupt(MSVC);
+		break;
+	default:
+		undefined(instr);
+	}
+fetch:
+	instr1 = instr0;
+	s = step == steparm ? 4 : 2;
+	instr0 = memread(r[15], s, pipel != -1);
+	r[15] += s;
+	pipel <<= 1;
+	return cyc;
+}
+
+static void
+addflags(u32int a, u32int b, u32int c)
+{
+	u64int v;
+	
+	v = (u64int) a + b + c;
+	cpsr &= ~(FLAGN|FLAGZ|FLAGC|FLAGV);
+	if((u32int)v == 0)
+		cpsr |= FLAGZ;
+	cpsr |= v & FLAGN;
+	if(v >> 32 != 0)
+		cpsr |= FLAGC;
+	if((~(a ^ b) & (a ^ v) & 1<<31) != 0)
+		cpsr |= FLAGV;
+}
+
+static void
+nz(u32int v)
+{
+	cpsr &= ~(FLAGN|FLAGZ);
+	if(v == 0)
+		cpsr |= FLAGZ;
+	cpsr |= v & FLAGN;
+}
+
+static void
+thshadd(u16int instr)
+{
+	int Rd, Rs, off, op;
+	u32int val, a, b, cout;
+	
+	Rd = instr & 7;
+	Rs = instr >> 3 & 7;
+	off = instr >> 6 & 0x1f;
+	op = instr >> 11 & 3;
+	a = r[Rs];
+	switch(op){
+	case 0:
+		if(off == 0){
+			r[Rd] = val = a;
+			cout = cpsr >> 29;
+		}else{
+			r[Rd] = val = a << off;
+			cout = a >> 32 - off;
+		}
+		goto logflags;
+	case 1:
+		if(off == 0){
+			r[Rd] = val = 0;
+			cout = a >> 31;
+		}else{
+			r[Rd] = val = a >> off;
+			cout = a >> off - 1;
+		}
+		goto logflags;
+	case 2:
+		if(off == 0)
+			cout = r[Rd] = val = -((int)a < 0);
+		else{
+			r[Rd] = val = (int)a >> off;
+			cout = a >> off - 1;
+		}
+		goto logflags;
+	case 3:
+		break;
+	}
+	if((instr & 1<<10) == 0)
+		b = r[off & 7];
+	else
+		b = off & 7;
+	if((instr & 1<<9) != 0)
+		b = -b;
+	r[Rd] = a + b;
+	addflags(a, b, 0);
+	return;
+logflags:
+	cpsr &= ~(FLAGN|FLAGZ|FLAGC);
+	if(val == 0)
+		cpsr |= FLAGZ;
+	if((int)val < 0)
+		cpsr |= FLAGN;
+	if((cout & 1) != 0)
+		cpsr |= FLAGC;
+	
+}
+
+static void
+thaddimm(u16int instr)
+{
+	int Rd, b, op, a;
+	
+	b = instr & 0xff;
+	Rd = instr >> 8 & 7;
+	a = r[Rd];
+	op = instr >> 11 & 3;
+	switch(op){
+	case 0:
+		r[Rd] = b;
+		nz(b);
+		break;
+	case 1:
+		addflags(a, ~b, 1);
+		break;
+	case 2:
+		r[Rd] = a + b;
+		addflags(a, b, 0);
+		break;
+	case 3:
+		r[Rd] = a - b;
+		addflags(a, ~b, 1);
+		break;
+	}
+}
+
+static void
+thalu(u16int instr)
+{
+	int Rs, Rd;
+	u32int a, b, v, c;
+
+	switch(instr >> 10 & 3){
+	case 0:
+		Rd = instr & 7;
+		Rs = instr >> 3 & 7;
+		a = r[Rd];
+		b = r[Rs];
+		switch(instr >> 6 & 0xf){
+		case 0:
+			r[Rd] = v = a & b;
+			nz(v);
+			break;
+		case 1:
+			r[Rd] = v = a ^ b;
+			nz(v);
+			break;
+		case 2:
+			io();
+			v = a;
+			if(b != 0){
+				if(b < 32){
+					c = v >> 32 - b;
+					v <<= b;
+				}else if(b == 32){
+					c = v;
+					v = 0;
+				}else
+					c = v = 0;
+				cpsr = cpsr & ~FLAGC | c << 29 & FLAGC;
+			}
+			r[Rd] = v;
+			nz(v);
+			break;
+		case 3:
+			io();
+			v = a;
+			if(b != 0){
+				if(b < 32){
+					c = v >> b - 1;
+					v >>= b;
+				}else if(b == 32){
+					c = v >> 31;
+					v = 0;
+				}else
+					c = v = 0;
+				cpsr = cpsr & ~FLAGC | c << 29 & FLAGC;
+			}
+			r[Rd] = v;
+			nz(v);
+			break;
+		case 4:
+			io();
+			v = a;
+			if(b != 0){
+				if(b < 32){
+					c = v >> b - 1;
+					v >>= b;
+				}else
+					c = v = -((int)v < 0);
+				cpsr = cpsr & ~FLAGC | c << 29 & FLAGC;
+			}
+			r[Rd] = v;
+			nz(v);
+			break;
+		case 5:
+			c = cpsr >> 29 & 1;
+			r[Rd] = a + b + c;
+			addflags(a, b, c);
+			break;
+		case 6:
+			c = cpsr >> 29 & 1;
+			r[Rd] = a + ~b + c;
+			addflags(a, ~b, c);
+			break;
+		case 7:
+			io();
+			b &= 31;
+			r[Rd] = v = a >> b | a << 32 - b;
+			if(r[Rs] != 0){
+				c = a >> (b - 1 & 31);
+				cpsr = cpsr & ~FLAGC | c << 29 & FLAGC;
+			}
+			nz(v);
+			break;
+		case 8:
+			nz(a & b);
+			break;
+		case 9:
+			r[Rd] = -b;
+			addflags(0, ~b, 1);
+			break;
+		case 10:
+			addflags(a, ~b, 1);
+			break;
+		case 11:
+			addflags(a, b, 0);
+			break;
+		case 12:
+			r[Rd] = v = a | b;
+			nz(v);
+			break;
+		case 13:
+			r[Rd] = v = a * b;
+			mulspeed(a);
+			nz(v);
+			cpsr &= ~FLAGC;
+			break;
+		case 14:
+			r[Rd] = v = a & ~b;
+			nz(v);
+			break;
+		case 15:
+			r[Rd] = ~b;
+			nz(~b);
+			break;
+		}
+		break;
+	case 1:
+		Rd = instr & 7 | instr >> 4 & 8;
+		Rs = instr >> 3 & 15;
+		switch(instr >> 8 & 3){
+		case 0:
+			r[Rd] += r[Rs];
+			if(Rd == 15){
+				r[15] &= ~1;
+				pipeflush();
+			}
+			break;
+		case 1:
+			addflags(r[Rd], ~r[Rs], 1);
+			break;
+		case 2:
+			r[Rd] = r[Rs];
+			if(Rd == 15){
+				r[15] &= ~1;
+				pipeflush();
+			}
+			break;
+		case 3:
+			if((r[Rs] & 1) == 0)
+				setcpsr(cpsr & ~FLAGT);
+			r[15] = r[Rs] & ~1;
+			pipeflush();
+			break;
+		}
+		break;
+	case 2: case 3:
+		Rd = instr >> 8 & 7;
+		a = (r[15] & ~3) + (((u8int) instr) << 2);
+		io();
+		r[Rd] = memread(a & ~3, 4, 0);
+		break;
+	}
+}
+
+static void
+thldst(u16int instr)
+{
+	int Rd, Rb, Ro, size, sx, load, sh;
+	u32int v, off;
+	
+	Rd = instr & 7;
+	Rb = instr >> 3 & 7;
+	sx = 0;
+	switch(instr >> 13){
+	case 2:
+		Ro = instr >> 6 & 7;
+		off = r[Ro];
+		if((instr & 1<<9) != 0){
+			load = instr & 3<<10;
+			sx = instr & 1<<10;
+			size = load == 1<<10 ? 1 : 2;
+		}else{
+			load = instr & 1<<11;
+			size = (instr & 1<<10) != 0 ? 1 : 4;
+		}
+		break;
+	default:
+	case 3:
+		if((instr & 1<<12) != 0){
+			off = instr >> 6 & 0x1f;
+			size = 1;
+		}else{
+			off = instr >> 4 & 0x7c;
+			size = 4;
+		}
+		load = instr & 1<<11;
+		break;
+	case 4:
+		if((instr & 1<<12) == 0){
+			off = instr >> 5 & 0x3e;
+			size = 2;
+			load = instr & 1<<11;
+		}else{
+			Rb = 13;
+			Rd = instr >> 8 & 7;
+			off = instr << 2 & 0x3fc;
+			load = instr & 1<<11;
+			size = 4;
+		}
+		break;
+	}
+	off += r[Rb];
+	if(load){
+		io();
+		v = memread(off & ~(size - 1), size, 0);
+		if(sx)
+			if(size == 2)
+				v = ((int)(v << 16)) >> 16;
+			else
+				v = ((int)(v << 24)) >> 24;
+		if((off & size - 1) != 0){
+			sh = (off & size - 1) << 3;
+			v = v >> sh | v << 32 - sh;
+		}
+		r[Rd] = v;
+	}else
+		memwrite(off, r[Rd], size);
+}
+
+static void
+thldaddr(u16int instr)
+{
+	int Rd, imm, v;
+
+	imm = instr << 2 & 0x3fc;
+	if((instr & 1<<11) != 0)
+		v = r[13];
+	else
+		v = r[15] & ~3;
+	Rd = instr >> 8 & 7;
+	r[Rd] = v + imm;
+}
+
+static void
+thmulti(u16int instr)
+{
+	int off, lr, Rb;
+	int i, seq;
+	u32int addr;
+	
+	if((instr >> 8) == 0xb0){
+		off = instr << 2 & 0x1fc;
+		if((instr & 1<<7) != 0)
+			off = -off;
+		r[13] += off;
+		return;
+	}
+	if(instr >> 14 != 3){
+		Rb = 13;
+		lr = instr & 1<<8;
+	}else{
+		Rb = instr >> 8 & 7;
+		lr = 0;
+	}
+	addr = r[Rb];
+	seq = 0;
+	if((instr & 1<<11) != 0){
+		io();
+		for(i = 0; i < 8; i++){
+			if((instr & 1<<i) == 0)
+				continue;
+			r[i] = memread(addr & ~3, 4, seq);
+			addr += 4;
+			seq = 1;
+		}
+		if(lr){
+			r[15] = memread(addr & ~3, 4, seq) & ~1;
+			pipeflush();
+			addr += 4;
+		}
+	}else if(Rb == 13){
+		if(lr){
+			addr -= 4;
+			memwrite(addr & ~3, r[14], 4);
+		}
+		for(i = 7; i >= 0; i--){
+			if((instr & 1<<i) == 0)
+				continue;
+			addr -= 4;
+			memwrite(addr & ~3, r[i], 4);
+		}
+	}else
+		for(i = 0; i < 8; i++){
+			if((instr & 1<<i) == 0)
+				continue;
+			memwrite(addr & ~3, r[i], 4);
+			addr += 4;
+		}
+	if(Rb == 13 || (instr & 1<<Rb) == 0)
+		r[Rb] = addr;
+}
+
+static void
+thcondbranch(u16int instr)
+{
+	if((instr >> 8 & 15) == 0xf){
+		interrupt(MSVC);
+		return;
+	}
+	if(!cond(instr >> 8 & 15, instr))
+		return;
+	r[15] += ((int)(instr << 24)) >> 23;
+	pipeflush();
+}
+
+static void
+thbranch(u16int instr)
+{
+	r[15] += ((int)(instr << 21)) >> 20;
+	pipeflush();
+}
+
+static void
+thlbranch(u16int instr)
+{
+	if((instr & 1<<11) != 0){
+		r[15] = r[14] + (instr << 1 & 0xffe);
+		r[14] = curpc + 3;
+		pipeflush();
+	}else
+		r[14] = r[15] + ((int)(instr << 21)>>9);
+}
+
+static int
+stepthumb(void)
+{
+	u16int instr;
+	int s;
+
+	cyc = 0;
+	if((pipel & 2) != 0)
+		goto fetch;
+	if(irq && (cpsr & FLAGI) == 0){
+		interrupt(MIRQ);
+		return 1;
+	}
+	curpc = r[15] - 4;
+	instr = instr1;
+	if(trace)
+		print("T %.8ux %.4ux %.8ux %.8ux %.8ux | %.8ux %.8ux %.8ux %.8ux | %.8ux %.8ux %.8ux %.8ux\n", curpc, instr, cpsr, r[13], r[14], r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7]);
+	switch(instr >> 12 & 0xf){
+	case 0: case 1:
+		thshadd(instr);
+		break;
+	case 2: case 3:
+		thaddimm(instr);
+		break;
+	case 4:
+		thalu(instr);
+		break;
+	case 5: case 6: case 7: case 8: case 9:
+		thldst(instr);
+		break;
+	case 10:
+		thldaddr(instr);
+		break;
+	case 11: case 12:
+		thmulti(instr);
+		break;
+	case 13:
+		thcondbranch(instr);
+		break;
+	case 14:
+		thbranch(instr);
+		break;
+	case 15:
+		thlbranch(instr);
+		break;
+	default:
+		undefined(instr);
+	}
+fetch:
+	instr1 = instr0;
+	s = step == steparm ? 4 : 2;
+	instr0 = memread(r[15], s, pipel != -1);
+	r[15] += s;
+	pipel <<= 1;
+	return cyc;
+}
+
+void
+reset(void)
+{
+	setcpsr(0xd3);
+	r[15] = 0;
+	pipel = -1;
+}
--- /dev/null
+++ b/sys/src/games/gba/dat.h
@@ -1,0 +1,114 @@
+typedef char s8int;
+typedef short s16int;
+typedef long s32int;
+typedef vlong s64int;
+
+extern int cpuhalt, trace, keys;
+
+extern u32int curpc;
+extern int irq;
+
+extern int dmaact;
+extern uchar vram[];
+extern u16int pram[], oam[];
+extern u16int reg[];
+extern uchar *rom, *back;
+extern int nrom, nback, backup;
+
+extern int ppux, ppuy;
+extern u8int bldy, blda, bldb;
+
+extern int scale;
+
+enum {
+	DISPCNT = 0x0/2,
+	DISPSTAT = 0x4/2,
+	BG0CNT = 0x8/2,
+	BG0HOFS = 0x10/2,
+	BG0VOFS = 0x12/2,
+
+	BG2PA = 0x20/2,
+	BG2PB = 0x22/2,
+	BG2PC = 0x24/2,
+	BG2PD = 0x26/2,
+	BG2XL = 0x28/2,
+	BG2XH = 0x2a/2,
+	BG2YL = 0x2c/2,
+	BG2YH = 0x2e/2,
+	
+	WIN0H = 0x40/2,
+	WIN1H = 0x42/2,
+	WIN0V = 0x44/2,
+	WIN1V = 0x46/2,
+	WININ = 0x48/2,
+	WINOUT = 0x4a/2,
+	BLDCNT = 0x50/2,
+	BLDALPHA = 0x52/2,
+	BLDY = 0x54/2,
+	
+	DMA0CNTH = 0xba/2,
+	DMA1CNTH = 0xc6/2,
+	DMA2CNTH = 0xd2/2,
+	DMA3CNTH = 0xde/2,
+	
+	KEYCNT = 0x132/2,
+
+	IE = 0x200/2,
+	IF = 0x202/2,
+	WAITCNT = 0x204/2,
+	IME = 0x208/2,
+	
+};
+
+enum {
+	/* DISPCNT */
+	FRAME = 1<<4,
+	HBLFREE = 1<<5,
+	OBJNOMAT = 1<<6,
+	FBLANK = 1<<7,
+
+	/* DISPSTAT */
+	IRQVBLEN = 1<<3,
+	IRQHBLEN = 1<<4,
+	IRQVCTREN = 1<<5,
+
+	/* BGnCNT */
+	BG8 = 1<<7,
+	DISPWRAP = 1<<13,
+	
+	/* DMAnCNTH */
+	DMADCNT = 5,
+	DMASCNT = 7,
+	DMAREP = 1<<9,
+	DMAWIDE = 1<<10,
+	DMAWHEN = 12,
+	DMAIRQ = 1<<14,
+	DMAEN = 1<<15,
+
+	DMAINC = 0,
+	DMADEC = 1,
+	DMAFIX = 2,
+	DMAINCREL = 3,
+
+	DMANOW = 0,
+	DMAVBL = 1,
+	DMAHBL = 2,
+	DMASPEC = 3,
+	DMASOUND = 4,
+	DMAVIDEO = 5,
+	
+	IRQVBL = 1<<0,
+	IRQHBL = 1<<1,
+	IRQVCTR = 1<<2,
+	IRQTIM0 = 1<<3,
+	IRQDMA0 = 1<<8,
+	IRQKEY = 1<<12,
+	
+	NOBACK = 0,
+	SRAM = 1,
+	EEPROM = 2,
+	FLASH = 3,
+	
+	KB = 1024,
+	BACKTYPELEN = 64,
+};
--- /dev/null
+++ b/sys/src/games/gba/fns.h
@@ -1,0 +1,13 @@
+u32int memread(u32int, int, int);
+void memwrite(u32int, u32int, int);
+extern int (*step)(void);
+void reset(void);
+void memreset(void);
+void setif(u16int);
+void ppustep(void);
+void timerstep(int t);
+void flush(void);
+int dmastep(void);
+void dmastart(int);
+void flushback(void);
+void writeback(void);
--- /dev/null
+++ b/sys/src/games/gba/gba.c
@@ -1,0 +1,439 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <draw.h>
+#include <mouse.h>
+#include <keyboard.h>
+#include "dat.h"
+#include "fns.h"
+
+int cpuhalt;
+int scale, profile;
+Rectangle picr;
+Image *bg, *tmp;
+Mousectl *mc;
+int keys, paused, framestep, backup;
+QLock pauselock;
+int savefd, saveframes;
+
+char *biosfile = "/sys/games/lib/gbabios.bin";
+
+int ppuclock;
+
+void *
+emalloc(ulong sz)
+{
+	void *v;
+	
+	v = malloc(sz);
+	if(v == nil)
+		sysfatal("malloc: %r");
+	setmalloctag(v, getcallerpc(&sz));
+	return v;
+}
+
+void
+writeback(void)
+{
+	if(saveframes == 0)
+		saveframes = 15;
+}
+
+void
+flushback(void)
+{
+	if(savefd >= 0)
+		pwrite(savefd, back, nback, BACKTYPELEN);
+	saveframes = 0;
+}
+
+void
+loadbios(void)
+{
+	extern uchar bios[16384];
+
+	int fd;
+	
+	fd = open(biosfile, OREAD);
+	if(fd < 0)
+		sysfatal("open: %r");
+	readn(fd, bios, 16384);
+	close(fd);
+}
+
+int
+romtype(int *size)
+{
+	u32int *p, n, v;
+	union {char a[4]; u32int u;} s1 = {"EEPR"}, s2 = {"SRAM"}, s3 = {"FLAS"};
+	
+	p = (u32int *) rom;
+	n = nrom / 4;
+	do{
+		v = *p++;
+		if(v == s1.u && memcmp(p - 1, "EEPROM_V", 8) == 0){
+			print("backup type is either eeprom4 or eeprom64 -- can't detect which one\n");
+			return NOBACK;
+		}
+		if(v == s2.u && memcmp(p - 1, "SRAM_V", 6) == 0){
+			*size = 32*KB;
+			return SRAM;
+		}
+		if(v == s3.u){
+			if(memcmp(p - 1, "FLASH_V", 7) == 0 || memcmp(p - 1, "FLASH512_V", 10) == 0){
+				*size = 64*KB;
+				return FLASH;
+			}
+			if(memcmp(p - 1, "FLASH1M_V", 9) == 0){
+				*size = 128*KB;
+				return FLASH;
+			}
+		}
+	}while(--n);
+	return NOBACK;
+}
+
+int
+parsetype(char *s, int *size)
+{
+	if(strcmp(s, "eeprom4") == 0){
+		*size = 512;
+		return EEPROM;
+	}else if(strcmp(s, "eeprom64") == 0){
+		*size = 8*KB;
+		return EEPROM;
+	}else if(strcmp(s, "sram256") == 0){
+		*size = 32*KB;
+		return SRAM;
+	}else if(strcmp(s, "flash512") == 0){
+		*size = 64*KB;
+		return FLASH;
+	}else if(strcmp(s, "flash1024") == 0){
+		*size = 128*KB;
+		return FLASH;
+	}else
+		return NOBACK;
+}
+
+void
+typename(char *s, int type, int size)
+{
+	char *st;
+	switch(type){
+	case EEPROM:
+		st = "eeprom";
+		break;
+	case FLASH:
+		st = "flash";
+		break;
+	case SRAM:
+		st = "sram";
+		break;
+	default:
+		sysfatal("typestr: unknown type %d -- shouldn't happen", type);
+		return;
+	}
+	snprint(s, BACKTYPELEN, "%s%d", st, size/128);
+}
+
+void
+loadsave(char *file)
+{
+	char *buf, *p;
+	char tstr[BACKTYPELEN];
+	int type, size;
+	
+	buf = emalloc(strlen(file) + 4);
+	strcpy(buf, file);
+	p = strchr(buf, '.');
+	if(p == nil)
+		p = buf + strlen(buf);
+	strcpy(p, ".sav");
+	savefd = open(buf, ORDWR);
+	if(savefd < 0){
+		if(backup == NOBACK){
+			backup = romtype(&nback);
+			if(backup == NOBACK){
+				fprint(2, "failed to autodetect save format\n");
+				free(buf);
+				return;
+			}
+		}
+		savefd = create(buf, OWRITE, 0664);
+		if(savefd < 0){
+			fprint(2, "create: %r");
+			free(buf);
+			return;
+		}
+		memset(tstr, 0, sizeof(tstr));
+		typename(tstr, backup, nback);
+		write(savefd, tstr, sizeof(tstr));
+		back = emalloc(nback);
+		memset(back, 0, nback);
+		write(savefd, back, nback);
+		free(buf);
+		atexit(flushback);
+		return;
+	}
+	readn(savefd, tstr, sizeof(tstr));
+	tstr[31] = 0;
+	type = parsetype(tstr, &size);
+	if(type == NOBACK || backup != NOBACK && (type != backup || nback != size))
+		sysfatal("%s: invalid format", buf);
+	backup = type;
+	nback = size;
+	back = emalloc(nback);
+	readn(savefd, back, nback);
+	atexit(flushback);
+	free(buf);
+}
+
+void
+loadrom(char *file)
+{
+	int fd;
+	vlong sz;
+	
+	fd = open(file, OREAD);
+	if(fd < 0)
+		sysfatal("open: %r");
+	sz = seek(fd, 0, 2);
+	if(sz <= 0 || sz >= 32*1024*1024)
+		sysfatal("nope.jpg");
+	seek(fd, 0, 0);
+	nrom = sz;
+	rom = emalloc(nrom);
+	if(readn(fd, rom, sz) < sz)
+		sysfatal("read: %r");
+	close(fd);
+	loadsave(file);
+	if(nrom == 32*KB*KB && backup == EEPROM)
+		nrom -= 256;
+}
+
+void
+screeninit(void)
+{
+	Point p;
+
+	p = divpt(addpt(screen->r.min, screen->r.max), 2);
+	picr = (Rectangle){subpt(p, Pt(scale * 120, scale * 80)), addpt(p, Pt(scale * 120, scale * 80))};
+	tmp = allocimage(display, Rect(0, 0, scale * 240, scale > 1 ? 1 : scale * 160), CHAN4(CIgnore, 1, CBlue, 5, CGreen, 5, CRed, 5), scale > 1, 0);
+	bg = allocimage(display, Rect(0, 0, 1, 1), screen->chan, 1, 0xCCCCCCFF);
+	draw(screen, screen->r, bg, nil, ZP);	
+}
+
+void
+keyproc(void *)
+{
+	int fd, k;
+	static char buf[256];
+	char *s;
+	Rune r;
+
+	fd = open("/dev/kbd", OREAD);
+	if(fd < 0)
+		sysfatal("open: %r");
+	for(;;){
+		if(read(fd, buf, sizeof(buf) - 1) <= 0)
+			sysfatal("read /dev/kbd: %r");
+		if(buf[0] == 'c'){
+			/*if(utfrune(buf, KF|5))
+				savereq = 1;
+			if(utfrune(buf, KF|6))
+				loadreq = 1;*/
+			if(utfrune(buf, Kdel)){
+				close(fd);
+				threadexitsall(nil);
+			}
+			if(utfrune(buf, 't'))
+				trace = !trace;
+		}
+		if(buf[0] != 'k' && buf[0] != 'K')
+			continue;
+		s = buf + 1;
+		k = 0;
+		while(*s != 0){
+			s += chartorune(&r, s);
+			switch(r){
+			case Kdel: close(fd); threadexitsall(nil);
+			case 'z': k |= 1<<1; break;
+			case 'x': k |= 1<<0; break;
+			case 'a': k |= 1<<9; break;
+			case 's': k |= 1<<8; break;
+			case Kshift: k |= 1<<2; break;
+			case 10: k |= 1<<3; break;
+			case Kup: k |= 1<<6; break;
+			case Kdown: k |= 1<<7; break;
+			case Kleft: k |= 1<<5; break;
+			case Kright: k |= 1<<4; break;
+			case Kesc:
+				if(paused)
+					qunlock(&pauselock);
+				else
+					qlock(&pauselock);
+				paused = !paused;
+				break;
+			case KF|1:	
+				if(paused){
+					qunlock(&pauselock);
+					paused=0;
+				}
+				framestep = !framestep;
+				break;
+			}
+		}
+		keys = k;
+	}
+
+}
+
+void
+timing(void)
+{
+	static int fcount;
+	static vlong old;
+	static char buf[32];
+	vlong new;
+	
+	if(++fcount == 60)
+		fcount = 0;
+	else
+		return;
+	new = nsec();
+	if(new != old)
+		sprint(buf, "%6.2f%%", 1e11 / (new - old));
+	else
+		buf[0] = 0;
+	draw(screen, rectaddpt(Rect(10, 10, 200, 30), screen->r.min), bg, nil, ZP);
+	string(screen, addpt(screen->r.min, Pt(10, 10)), display->black, ZP, display->defaultfont, buf);
+	old = nsec();
+}
+
+void
+flush(void)
+{
+	extern uchar pic[];
+	Mouse m;
+	int x;
+
+	if(nbrecvul(mc->resizec) > 0){
+		if(getwindow(display, Refnone) < 0)
+			sysfatal("resize failed: %r");
+		screeninit();
+	}
+	while(nbrecv(mc->c, &m) > 0)
+		;
+	if(scale == 1){
+		loadimage(tmp, tmp->r, pic, 240*160*2);
+		draw(screen, picr, tmp, nil, ZP);
+	} else {
+		Rectangle r;
+		uchar *s;
+		int w;
+
+		s = pic;
+		r = picr;
+		w = 240*2*scale;
+		while(r.min.y < picr.max.y){
+			loadimage(tmp, tmp->r, s, w);
+			s += w;
+			r.max.y = r.min.y+scale;
+			draw(screen, r, tmp, nil, ZP);
+			r.min.y = r.max.y;
+		}
+	}
+	flushimage(display, 1);
+	if(profile)
+		timing();
+	if(framestep){
+		paused = 1;
+		qlock(&pauselock);
+		framestep = 0;
+	}
+	
+	if(saveframes > 0 && --saveframes == 0)
+		flushback();
+	
+	if((reg[KEYCNT] & 1<<14) != 0){
+		x = reg[KEYCNT] & keys;
+		if((reg[KEYCNT] & 1<<15) != 0){
+			if(x == (reg[KEYCNT] & 0x3ff))
+				setif(IRQKEY);
+		}else
+			if(x != 0)
+				setif(IRQKEY);
+	}
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: %s [-23T] [-s savetype] [-b biosfile] rom\n", argv0);
+	exits("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	char *s;
+	int t;
+
+	scale = 1;
+	ARGBEGIN {
+	case '2':
+		scale = 2;
+		break;
+	case '3':
+		scale = 3;
+		break;
+	case 's':
+		s = EARGF(usage());
+		backup = parsetype(s, &nback);
+		if(backup == NOBACK)
+			sysfatal("unknown save type '%s'", s);
+		break;
+	case 'b':
+		biosfile = strdup(EARGF(usage()));
+		break;
+	case 'T':
+		profile++;
+		break;
+	default:
+		usage();
+	} ARGEND;
+	if(argc < 1)
+		usage();
+
+	loadbios();
+	loadrom(argv[0]);
+	
+	if(initdraw(nil, nil, nil) < 0)
+		sysfatal("initdraw: %r");
+	mc = initmouse(nil, screen);
+	if(mc == nil)
+		sysfatal("initmouse: %r");
+	proccreate(keyproc, nil, mainstacksize);
+	screeninit();
+	
+	memreset();
+	reset();
+	for(;;){
+		if(paused){
+			qlock(&pauselock);
+			qunlock(&pauselock);
+		}
+		if(dmaact)
+			t = dmastep();
+		else if(cpuhalt)
+			t = 8;
+		else
+			t = step();
+		ppuclock += t;
+		while(ppuclock >= 4){
+			ppustep();
+			ppuclock -= 4;
+		}
+		timerstep(t);
+	}
+}
--- /dev/null
+++ b/sys/src/games/gba/mem.c
@@ -1,0 +1,665 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+uchar bios[16*KB], wram0[32*KB], wram1[256*KB];
+uchar vram[96*KB];
+u16int pram[512], oam[512];
+uchar *rom, *back;
+int nrom, nback;
+u16int reg[512];
+u16int tim[4];
+int timerclock;
+int dmaact;
+enum {
+	DMASRC,
+	DMADST,
+	DMACNT
+};
+u32int dmar[16];
+u8int waitst[16] = {5, 5, 5, 5, 3, 5, 5, 9, 8, 10, 10, 14};
+
+extern int cyc;
+
+static int eepromread(void);
+static void eepromwrite(int);
+static u8int flashread(u16int);
+static void flashwrite(u16int, u8int);
+
+static u32int
+arread(uchar *c, int n)
+{
+	switch(n){
+	default:
+		return c[0];
+	case 2:
+		return c[0] | c[1] << 8;
+	case 4:
+		return c[0] | c[1] << 8 | c[2] << 16 | c[3] << 24;
+	}
+}
+
+static void
+arwrite(uchar *c, u32int v, int n)
+{
+	switch(n){
+	case 4:
+		c[3] = v >> 24;
+		c[2] = v >> 16;
+	case 2:
+		c[1] = v >> 8;
+	default:
+		c[0] = v;
+	}
+}
+
+static u32int
+ar16read(u16int *c, int h, int n)
+{
+	switch(n){
+	case 1:
+		return c[0] >> (h << 3);
+	default:
+		return c[0];
+	case 4:
+		return c[0] | c[1] << 16;
+	}
+}
+
+static void
+ar16write(u16int *c, int h, u32int v, int n)
+{
+	switch(n){
+	case 1:
+		if(h)
+			c[0] = c[0] & 0xff | ((u8int)v) << 8;
+		else
+			c[0] = c[0] & 0xff00 | (u8int)v;
+		break;
+	case 2:
+		c[0] = v;
+		break;
+	case 4:
+		c[0] = v;
+		c[1] = v >> 16;
+		break;
+	}
+}
+
+static u32int
+regread(u32int a)
+{
+	u32int v;
+
+	switch(a){
+	case DISPSTAT*2:
+		v = reg[a/2] & ~7;
+		
+		if(ppuy >= 160 && ppuy != 227)
+			v |= 1;
+		if(ppux >= 240)
+			v |= 2;
+		if(ppuy == v >> 8)
+			v |= 4;
+		return v;
+	case 0x006:
+		return ppuy;
+	case 0x100: case 0x104: case 0x108: case 0x10c:
+		return tim[(a - 0x100) / 4];
+	case 0x130:
+		return keys ^ 0x3ff;
+	default:
+		return reg[a/2];
+	}
+}
+
+static void
+regwrite16(u32int a, u16int v)
+{
+	u16int *p;
+	int i;
+	static u8int ws0[4] = {5,4,3,9};
+	
+	p = &reg[a/2];
+	switch(a){
+	case IF*2:
+		*p &= ~v;
+		setif(0);
+		return;
+	case BLDALPHA*2:
+		blda = v & 0x1f;
+		if(blda > 16)
+			blda = 16;
+		bldb = v >> 8 & 0x1f;
+		if(bldb > 16)
+			bldb = 16;
+		break;
+	case BLDY*2:
+		bldy = v & 0x1f;
+		if(bldy > 16)
+			bldy = 16;
+		break;
+	case DMA0CNTH*2: case DMA1CNTH*2: case DMA2CNTH*2: case DMA3CNTH*2:
+		if((*p & DMAEN) == 0 && (v & DMAEN) != 0){
+			i = (a - DMA0CNTH*2) / 12;
+			if((v >> DMAWHEN & 3) == 0)
+				dmaact |= 1<<i;
+			if(i == 3 && (v >> DMAWHEN & 3) == 3)
+				print("DMA video capture mode\n");
+			dmar[4*i + DMASRC] = p[-5] | p[-4] << 16;
+			dmar[4*i + DMADST] = p[-3] | p[-2] << 16;
+			dmar[4*i + DMACNT] = p[-1];
+		}
+		break;
+	case 0x102: case 0x106: case 0x10a: case 0x10e:
+		if((*p & 1<<7) == 0 && (v & 1<<7) != 0)
+			tim[(a-0x102)/4] = p[-1];
+		break;
+	case IME*2: case IE*2:
+		setif(0);
+		break;
+	case WAITCNT*2:
+		waitst[3] = waitst[7] = ws0[v & 3];
+		waitst[0] = ws0[v >> 2 & 3];
+		waitst[4] = ((v & 1<<4) == 0) + 2;
+		waitst[1] = ws0[v >> 5 & 3];
+		waitst[5] = (v & 1<<7) == 0 ? 5 : 2;
+		waitst[2] = ws0[v >> 8 & 3];
+		waitst[6] = (v & 1<<10) == 0 ? 9 : 2;
+		for(i = 0; i < 8; i++)
+			waitst[8 + i] = waitst[i] + waitst[i | 4];
+		break;
+	case 0x301:
+		cpuhalt = 1;
+		break;
+	}
+	*p = v;
+}
+
+static void
+regwrite(u32int a, u32int v, int n)
+{
+	u16int w;
+
+	switch(n){
+	case 1:
+		if((a & ~1) == IF)
+			w = 0;
+		else
+			w = regread(a);
+		if((a & 1) == 0)
+			w = w & 0xff00 | (u8int)v;
+		else
+			w = w & 0xff | v << 8;
+		regwrite16(a, w);
+		break;
+	default:
+		if((a & 1) != 0)
+			sysfatal("unaligned register access");
+		regwrite16(a, v);
+		break;
+	case 4:
+		if((a & 1) != 0)
+			sysfatal("unaligned register access");
+		regwrite16(a, v);
+		regwrite16(a + 2, v >> 16);
+		break;
+	}
+}
+
+void
+setif(u16int v)
+{
+	reg[IF] |= v;
+	irq = (reg[IME] & 1) != 0 && (reg[IF] & reg[IE]) != 0;
+	if(irq)
+		cpuhalt = 0;
+}
+
+u32int
+memread(u32int a, int n, int seq)
+{
+	u32int b;
+	assert((a & n-1) == 0);
+
+	switch(a >> 24){
+	case 0:
+		b = a & sizeof(bios) - 1;
+		cyc++;
+		return arread(bios + b, n);
+	case 2:
+		b = a & sizeof(wram1) - 1;
+		cyc += n > 2 ? 6 : 3;
+		return arread(wram1 + b, n);
+	case 3:
+		b = a & sizeof(wram0) - 1;
+		cyc++;
+		return arread(wram0 + b, n);
+	case 4:
+		b = a & 0xffffff;
+		if(b >= sizeof(reg)) goto fault;
+		cyc++;
+		if(n == 4)
+			return regread(b) | regread(b+2) << 16;
+		return regread(b);
+	case 5:
+		b = a & sizeof(pram) - 1;
+		cyc += (n+1) >> 1;
+		return ar16read(pram + b/2, b & 1, n);
+	case 6:
+		b = a & 128*KB - 1;
+		if(b >= 64*KB)
+			b &= ~(32*KB);
+		cyc += (n+1) >> 1;
+		return arread(vram + b, n);
+	case 7:
+		b = a & sizeof(oam) - 1;
+		cyc++;
+		return ar16read(oam + b/2, b & 1, n);
+	case 8: case 9: case 10: case 11: case 12: case 13:
+		b = a & 0x1ffffff;
+		cyc += waitst[(a >> 25) - 4 | seq << 2 | (n > 2) << 3];
+		if(b >= nrom){
+			if(backup == EEPROM && b >= 0x1000000 && (nrom < 16*KB*KB || b >= 0x1ffff00))
+				return eepromread();
+			return 0;
+		}
+		return arread(rom + b, n);
+	case 14:
+		if(backup == SRAM){
+			b = a & nback - 1;
+			return arread(back + b, n);
+		}
+		if(backup == FLASH)
+			return flashread(a);
+		return 0;
+	default:
+	fault:
+		sysfatal("read from %#.8ux (pc=%#.8ux)", a, curpc);
+		return 0;
+	}
+}
+
+void
+memwrite(u32int a, u32int v, int n)
+{
+	u32int b;
+	assert((a & n-1) == 0);
+
+	switch(a >> 24){
+	case 0:
+		return;
+	case 2:
+		b = a & sizeof(wram1) - 1;
+		cyc += n > 2 ? 6 : 3;
+		arwrite(wram1 + b, v, n);
+		return;
+	case 3:
+		b = a & sizeof(wram0) - 1;
+		cyc++;
+		arwrite(wram0 + b, v, n);
+		return;
+	case 4:
+		cyc++;
+		b = a & 0xffffff;
+		if(b == 0x410) return;
+		if(b >= sizeof(reg)) goto fault;
+		regwrite(b, v, n);
+		return;
+	case 5:
+		b = a & sizeof(pram) - 1;
+		cyc += (n+1) >> 1;
+		ar16write(pram + b/2, b & 1, v, n);
+		return;
+	case 6:
+		b = a & 128*KB - 1;
+		if(b >= 64*KB)
+			b &= ~(32*KB);
+		cyc += (n+1) >> 1;
+		arwrite(vram + b, v, n);
+		return;
+	case 7:
+		b = a & sizeof(oam) - 1;
+		cyc++;
+		ar16write(oam + b/2, b & 1, v, n);
+		return;
+	case 8: case 9: case 10: case 11: case 12: case 13:
+		if(backup == EEPROM){
+			b = a & 0x01ffffff;
+			if(b >= 0x1000000 && (nrom < 16*KB*KB || b >= 0x1ffff00))
+				eepromwrite(v & 1);
+		}
+		return;
+	case 14:
+		if(backup == SRAM){
+			b = a & nback - 1;
+			arwrite(back + b, v, n);
+			writeback();
+			return;
+		}
+		if(backup == FLASH){
+			flashwrite(a, v);
+			return;
+		}
+		return;
+	default:
+	fault:
+		sysfatal("write to %#.8ux, value %#.8ux (pc=%#.8ux)", a, v, curpc);
+	}
+}
+
+void
+memreset(void)
+{
+	reg[0x88/2] = 0x200;
+}
+
+void
+timerstep(int t)
+{
+	int i, carry;
+	u16int c;
+	u16int nt;
+
+	nt = -t;
+	carry = 0;
+	timerclock += t;
+	for(i = 0; i < 4; i++){
+		c = reg[0x102/2 + i*2];
+		if((c & 1<<7) == 0)
+			goto next;
+		if((c & 1<<2) == 0)
+			switch(c & 3){
+			case 1:
+				if((timerclock & 63) != 0)
+					goto next;
+				break;
+			case 2:
+				if((timerclock & 255) != 0)
+					goto next;
+				break;
+			case 3:
+				if((timerclock & 1023) != 0)
+					goto next;
+				break;
+			}
+		else
+			if(!carry)
+				goto next;
+		if(carry = tim[i] >= nt){
+			tim[i] += reg[0x100/2 + i*2];
+			if((c & 1<<6) != 0)
+				setif(IRQTIM0 << i);
+		}
+		tim[i] += t;
+		continue;
+	next:
+		carry = 0;
+	}
+}
+
+int
+dmastep(void)
+{
+	int i;
+	u16int *cntp, cnt;
+	u32int *dr;
+	u32int v;
+	int sz;
+	
+	cyc = 0;
+	for(i = 0; i < 4; i++)
+		if((dmaact & 1<<i) != 0)
+			break;
+	if(i == 4)
+		return cyc;
+	curpc = -1;
+	cntp = reg + DMA0CNTH + i * 6;
+	cnt = *cntp;
+	dr = dmar + 4 * i;
+
+	sz = (cnt & DMAWIDE) != 0 ? 4 : 2;
+	if(i == 0)
+		dr[DMASRC] &= 0x07FFFFFF;
+	else
+		dr[DMASRC] &= 0x0FFFFFFF;
+	if(i != 3)
+		dr[DMADST] &= 0x7FFFFFFF;
+	else
+		dr[DMADST] &= 0x0FFFFFFF;
+	v = memread(dr[DMASRC] & -sz, sz, 1);
+	memwrite(dr[DMADST] & -sz, v, sz);
+	switch(cnt >> DMADCNT & 3){
+	case DMAINC: case DMAINCREL: dr[DMADST] += sz; break;
+	case DMADEC: dr[DMADST] -= sz; break;
+	}
+	switch(cnt >> DMASCNT & 3){
+	case DMAINC: dr[DMASRC] += sz; break;
+	case DMADEC: dr[DMASRC] -= sz; break;
+	}
+	if(dr[DMACNT] == 0)
+		dr[DMACNT] = i != 3 ? 0x4000 : 0x10000;
+	if(--dr[DMACNT] == 0){
+		dmaact &= ~(1<<i);
+		if((cnt & DMAREP) != 0){
+			dmar[DMACNT] = cntp[-1];
+			if((cnt >> DMADCNT & 3) == DMAINCREL)
+				dmar[DMADST] = cntp[-3] | cntp[-2] << 16;
+		}else
+			*cntp &= ~DMAEN;
+		if((cnt & DMAIRQ) != 0)
+			setif(IRQDMA0 << i);
+	}
+	return cyc;
+}
+
+void
+dmastart(int cond)
+{
+	int i;
+	u16int *cntp, cnt, c;
+	
+	cntp = reg + DMA0CNTH;
+	for(i = 0; i < 3; i++, cntp += 6){
+		cnt = *cntp;
+		if((cnt & DMAEN) == 0)
+			continue;
+		c = cnt >> DMAWHEN & 3;
+		if(c == 3)
+			c += (i + 1) / 2;
+		if(c == cond)
+			dmaact |= 1<<i;
+	}
+}
+
+int eepromstate, eeprompos, eepromaddr;
+u64int eepromdata;
+
+enum {
+	EEPROMCMD,
+	EEPROMRDCMD,
+	EEPROMRDRESP,
+	EEPROMWRCMD,
+	EEPROMWRDATA,
+	EEPROMWRRESP,
+};
+
+static int
+eepromread(void)
+{
+	int v;
+
+	switch(eepromstate){
+	case EEPROMRDRESP:
+		eeprompos++;
+		if(eeprompos <= 4)
+			return 0;
+		if(eeprompos == 68){
+			eepromstate = EEPROMCMD;
+			eeprompos = 0;
+		}
+		v = eepromdata >> 63;
+		eepromdata <<= 1;
+		return v;
+	case EEPROMWRRESP:
+		if(++eeprompos == 1000){
+			eepromstate = EEPROMCMD;
+			eeprompos = 0;
+			return 1;
+		}
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+static void
+eepromwrite(int n)
+{
+	uchar *p;
+
+	switch(eepromstate){
+	case EEPROMCMD:
+		eepromaddr = eepromaddr << 1 | n;
+		if(++eeprompos >= 2){
+			switch(eepromaddr & 3){
+			case 2:
+				eepromstate = EEPROMWRCMD;
+				break;
+			case 3:
+				eepromstate = EEPROMRDCMD;
+				break;
+			}
+			eeprompos = 0;
+		}
+		break;
+	case EEPROMRDCMD:
+	case EEPROMWRCMD:
+		eepromaddr = eepromaddr << 1 | n;
+		eeprompos++;
+		if(nback == 512){
+			if(eeprompos >= 7)
+				eepromaddr = eepromaddr >> 1 & 0x3f;
+			else
+				break;
+		}else{
+			if(eeprompos >= 15)
+				eepromaddr = eepromaddr >> 1 & 0x3fff;
+			else
+				break;
+		}
+		if(eepromstate == EEPROMRDCMD){
+			p = back + eepromaddr * 8;
+			eepromdata = p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24 | (u64int)p[4] << 32 | 
+				(u64int)p[5] << 40 | (u64int)p[6] << 48 | (u64int)p[7] << 56;
+			eeprompos = 0;
+			eepromstate = EEPROMRDRESP;
+			break;
+		}else{
+			eepromdata = n;
+			eeprompos = 1;
+			eepromstate = EEPROMWRDATA;
+			break;
+		}
+	case EEPROMWRDATA:
+		if(eeprompos == 64){
+			p = back + eepromaddr * 8;
+			p[0] = eepromdata;
+			p[1] = eepromdata >> 8;
+			p[2] = eepromdata >> 16;
+			p[3] = eepromdata >> 24;
+			p[4] = eepromdata >> 32;
+			p[5] = eepromdata >> 40;
+			p[6] = eepromdata >> 48;
+			p[7] = eepromdata >> 56;
+			eepromstate = EEPROMWRRESP;
+			eeprompos = 0;
+			writeback();
+			break;
+		}
+		eepromdata = eepromdata << 1 | n;
+		eeprompos++;
+		break;
+	}
+}
+
+int flashstate, flashmode, flashbank;
+
+enum {
+	FLASHCMD0,
+	FLASHCMD1,
+	FLASHCMD2,
+	FLASHBANK,
+	FLASHWRITE,
+	
+	FLASHID = 1,
+	FLASHERASE = 2,
+};
+
+static u8int
+flashread(u16int a)
+{
+	if((flashmode & FLASHID) != 0)
+		return (a & 1) != 0 ? 0xd4 : 0xbf;
+	return back[(flashbank << 16) + a];
+}
+
+static void
+flashwrite(u16int a, u8int v)
+{
+	int erase;
+
+	switch(flashstate){
+	case FLASHCMD0:
+		if(a == 0x5555 && v == 0xaa)
+			flashstate = FLASHCMD1;
+		break;
+	case FLASHCMD1:
+		if(a == 0x2aaa && v == 0x55)
+			flashstate = FLASHCMD2;
+		else
+			flashstate = FLASHCMD0;
+		break;
+	case FLASHCMD2:
+		flashstate = FLASHCMD0;
+		erase = flashmode & FLASHERASE;
+		flashmode &= ~FLASHERASE;
+		switch(v){
+		case 0x90: flashmode |= FLASHID; break;
+		case 0xF0: flashmode &= ~FLASHID; break;
+		case 0x80: flashmode |= FLASHERASE; break;
+		case 0x10:
+			if(erase){
+				memset(back, 0xff, nback);
+				writeback();
+			}
+			break;
+		case 0x30:
+			if(erase){
+				memset(back + (a & 0xf000) + (flashbank << 16), 0xff, 4096);
+				writeback();
+			}
+			break;
+		case 0xA0:
+			writeback();
+			flashstate = FLASHWRITE;
+			break;
+		case 0xB0: flashstate = FLASHBANK; break;
+		default:
+			print("unknown flash cmd %x\n", v);
+		}
+		break;
+	case FLASHBANK:
+		flashbank = v % (nback >> 16);
+		flashstate = FLASHCMD0;
+		break;
+	case FLASHWRITE:
+		back[(flashbank << 16) + a] &= v;
+		writeback();
+		flashstate = FLASHCMD0;
+		break;
+	}
+}
+
--- /dev/null
+++ b/sys/src/games/gba/mkfile
@@ -1,0 +1,13 @@
+</$objtype/mkfile
+
+BIN=/$objtype/bin/games
+TARG=gba
+OFILES=\
+	cpu.$O\
+	mem.$O\
+	gba.$O\
+	ppu.$O\
+
+HFILES=dat.h fns.h
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/sys/src/games/gba/ppu.c
@@ -1,0 +1,653 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+int ppux, ppuy;
+uchar pic[240*160*2*3*3];
+u8int bldy, blda, bldb;
+
+typedef struct bg bg;
+struct bg {
+	uchar n;
+	uchar depth;
+
+	s32int rpx0, rpy0, rpx, rpy;
+	s32int sx, sy;
+	
+	u16int tx, ty;
+	u8int tnx, tny;
+	u16int t;
+	u8int *chr;
+	u16int *pal;
+};
+static u8int mode=-1;
+static bg bgst[4] = {{.n = 0}, {.n = 1}, {.n = 2}, {.n = 3}};
+static u32int pixeldat[2], pixelpri[2];
+static u16int bgmask;
+static u8int objwin, objtrans;
+
+typedef struct sprite sprite;
+struct sprite {
+	uchar w, wb, h;
+	s16int x;
+	uchar ysh;
+	
+	uchar *base;
+	u16int *pal;
+	u16int inc;
+
+	u32int t0;
+	u16int t1;
+	uchar depth;
+	
+	s32int rx, ry;
+	s16int dx, dy;
+};
+static sprite sprt[128], *sp = sprt;
+enum {
+	SPRROT = 1<<8,
+	SPRDIS = 1<<9,
+	SPRDOUB = 1<<9,
+	SPR8 = 1<<13,
+	SPRWIDE = 1<<14,
+	SPRTALL = 1<<15,
+	SPRHFLIP = 1<<28,
+	SPRVFLIP = 1<<29,
+	SPRSIZE0 = 1<<30,
+	SPRSIZE1 = 1<<31
+};
+
+void
+pixeldraw(int x, int y, u16int v)
+{
+	uchar *p;
+	u16int *q;
+	union { u16int w; u8int b[2]; } u;
+
+	if(scale == 1){
+		p = pic + (x + y * 240) * 2;
+		p[0] = v;
+		p[1] = v >> 8;
+		return;
+	}
+	u.b[0] = v;
+	u.b[1] = v >> 8;
+	if(scale == 2){
+		q = (u16int*)pic + (x + y * 240) * 2;
+		q[0] = u.w;
+		q[1] = u.w;
+	}else{
+		q = (u16int*)pic + (x + y * 240) * 3;
+		q[0] = u.w;
+		q[1] = u.w;
+		q[2] = u.w;
+	}
+}
+
+void
+pixel(u16int c, int n, int p)
+{
+	if(p < pixelpri[0]){
+		pixeldat[1] = pixeldat[0];
+		pixelpri[1] = pixelpri[0];
+		pixelpri[0] = p;
+		pixeldat[0] = c | n << 16;
+	}else if(p < pixelpri[1]){
+		pixelpri[1] = p;
+		pixeldat[1] = c | n << 16;
+	}
+}
+
+void
+tile(bg *b)
+{
+	u16int bgcnt, ta, tx, ty, y, t;
+	u8int d;
+	u8int *chr;
+	
+	bgcnt = reg[BG0CNT + b->n];
+	d = bgcnt >> 7 & 1;
+	tx = b->tx;
+	ty = b->ty;
+	ta = (bgcnt << 3 & 0xf800) + ((tx & 0x1f) << 1) + ((ty & 0x1f) << 6);
+	switch(bgcnt >> 14){
+	case 1: ta += tx << 6 & 0x800; break;
+	case 2: ta += ty << 6 & 0x800; break;
+	case 3: ta += tx << 6 & 0x800 | ty << 7 & 0x1000; break;
+	}
+	t = vram[ta] | vram[ta+1] << 8;
+	b->t = t;
+	chr = vram + (bgcnt << 12 & 0xc000) + ((t & 0x3ff) << 5+d);
+	y = b->tny;
+	if((t & 1<<11) != 0)
+		y ^= 7;
+	chr = chr + (y << 2+d);
+	b->chr = chr;
+	if(d != 0)
+		b->pal = pram;
+	else
+		b->pal = pram + (t >> 8 & 0xf0);
+}
+
+void
+bginit(bg *b, int scal, int bit)
+{
+	u16int cnt, x, y;
+	u16int *rr;
+	
+	cnt = reg[DISPCNT];
+	if(scal){
+		rr = reg + (b->n - 2 << 3);
+		if(ppuy == 0){
+			b->rpx0 = (s32int)(rr[BG2XL] | rr[BG2XH] << 16) << 4 >> 4;
+			b->rpy0 = (s32int)(rr[BG2YL] | rr[BG2YH] << 16) << 4 >> 4;
+		}
+		b->rpx = b->rpx0;
+		b->rpy = b->rpy0;
+		b->rpx0 += (s16int)rr[BG2PB];
+		b->rpy0 += (s16int)rr[BG2PD];
+		switch(cnt & 7){
+		case 3:
+		case 4:
+			b->sx = 240 << 8;
+			b->sy = 160 << 8;
+			b->depth = (cnt & 7) == 3;
+			break;
+		case 5:
+			b->sx = 160 << 8;
+			b->sy = 128 << 8;
+			b->depth = 1;
+			break;
+		}
+	}else{
+		rr = reg + (b->n << 1);
+		x = rr[BG0HOFS] & 0x1ff;
+		y = (rr[BG0VOFS] & 0x1ff) + ppuy;
+		b->tx = x >> 3;
+		b->ty = y >> 3;
+		b->tnx = x & 7;
+		b->tny = y & 7;
+		tile(b);
+	}
+}
+
+void
+bgsinit(void)
+{
+	mode = reg[DISPCNT] & 7;
+	switch(mode){
+	case 0:
+		bginit(&bgst[0], 0, 0);
+		bginit(&bgst[1], 0, 0);
+		bginit(&bgst[2], 0, 0);
+		bginit(&bgst[3], 0, 0);
+		break;
+	case 1:
+		bginit(&bgst[0], 0, 0);
+		bginit(&bgst[1], 0, 0);
+		bginit(&bgst[2], 1, 0);
+		break;
+	case 2:
+		bginit(&bgst[2], 1, 0);
+		bginit(&bgst[3], 1, 0);
+		break;
+	case 3:
+	case 4:
+	case 5:
+		bginit(&bgst[2], 1, 1);
+		break;
+	}	
+}
+
+void
+bitbg(bg *b)
+{
+	u16int cnt;
+	int v;
+	uchar *p;
+	u16int *rr;
+	uchar *base;
+	
+	cnt = reg[DISPCNT];
+	rr = reg - 8 + (b->n << 3);
+	if((bgmask & 1<<b->n) == 0)
+		goto next;
+	if(b->rpx >= 0 && b->rpy >= 0 && b->rpx <= b->sx && b->rpy <= b->sy){
+		base = vram;
+		if((cnt & FRAME) != 0 && (cnt & 7) != 3)
+			base += 0xa000;
+		if(b->depth){
+			p = base + 2 * (b->rpx >> 8) + 480 * (b->rpy >> 8);
+			v = p[0] | p[1] << 8;
+		}else{
+			v = base[(b->rpx >> 8) + 240 * (b->rpy >> 8)];
+			if(v != 0)
+				v = pram[v];
+			else
+				v = -1;
+		}
+	}else
+		v = -1;
+	if(v >= 0)
+		pixel(v, b->n, reg[BG0CNT + b->n] & 3);
+next:
+	b->rpx += (s16int) rr[BG2PA];
+	b->rpy += (s16int) rr[BG2PC];
+}
+
+void
+rotbg(bg *b)
+{
+	u16int *rr, ta;
+	u16int bgcnt;
+	int row, sz, x, y;
+	uchar *p, v;
+
+	rr = reg - 8 + (b->n << 3);
+	if((bgmask & 1<<b->n) == 0)
+		goto next;
+	bgcnt = reg[BG0CNT + b->n];
+	row = (bgcnt >> 14) + 4;
+	sz = 1 << 3 + row;
+	x = b->rpx >> 8;
+	y = b->rpy >> 8;
+	if((bgcnt & DISPWRAP) != 0){
+		x &= sz - 1;
+		y &= sz - 1;
+	}else if((uint)x >= sz || (uint)y >= sz)
+		goto next;
+	ta = (bgcnt << 3 & 0xf800) + ((y >> 3) << row) + (x >> 3);
+	p = vram + (bgcnt << 12 & 0xc000) + (vram[ta] << 6);
+	p += (x & 7) + ((y & 7) << 3);
+	if((v = *p) != 0)
+		pixel(pram[v], b->n, bgcnt & 3);
+next:
+	b->rpx += (s16int) rr[BG2PA];
+	b->rpy += (s16int) rr[BG2PC];
+}
+
+void
+txtbg(bg *b)
+{
+	u16int bgcnt;
+	u8int x, v;
+
+	bgcnt = reg[BG0CNT + b->n];
+	if((bgmask & 1<<b->n) == 0)
+		goto next;
+	x = b->tnx;
+	if((b->t & 1<<10) != 0)
+		x ^= 7;
+	if((bgcnt & BG8) != 0)
+		v = b->chr[x];
+	else{
+		v = b->chr[x>>1];
+		if((x & 1) != 0)
+			v >>= 4;
+		else
+			v &= 0xf;
+	}
+	if(v != 0)
+		pixel(b->pal[v], b->n, bgcnt & 3);
+next:
+	if(++b->tnx == 8){
+		b->tnx = 0;
+		b->tx++;
+		tile(b);
+	}
+}
+
+void
+bgs(void)
+{
+	switch(mode){
+	case 0:
+		txtbg(&bgst[0]);
+		txtbg(&bgst[1]);
+		txtbg(&bgst[2]);
+		txtbg(&bgst[3]);
+		break;
+	case 1:
+		txtbg(&bgst[0]);
+		txtbg(&bgst[1]);
+		rotbg(&bgst[2]);
+		break;
+	case 2:
+		rotbg(&bgst[2]);
+		rotbg(&bgst[3]);
+		break;
+	case 3:
+	case 4:
+	case 5:
+		bitbg(&bgst[2]);
+		break;
+	}
+}
+
+void
+sprinit(void)
+{
+	u16int *p, *pp;
+	u16int cnt, t1;
+	u32int t0;
+	int budg;
+	uchar ws, h, hb, d, dy, s;
+	static uchar wss[16] = {3, 4, 5, 6, 4, 5, 5, 6, 3, 3, 4, 5};
+	static uchar hss[16] = {3, 4, 5, 6, 3, 3, 4, 5, 4, 5, 5, 6};
+
+	sp = sprt;
+	cnt = reg[DISPCNT];
+	budg = (cnt & HBLFREE) != 0 ? 954 : 1210;
+	for(p = oam; p < oam + 512; p += 4){
+		t0 = p[0];
+		if((t0 & (SPRROT|SPRDIS)) == SPRDIS)
+			continue;
+		t0 |= p[1] << 16;
+		s = t0 >> 30 & 3 | t0 >> 12 & 12;
+		hb = h = 1 << hss[s];
+		dy = ppuy - (u8int) t0;
+		if((t0 & (SPRROT|SPRDOUB)) == (SPRROT|SPRDOUB))
+			hb <<= 1;
+		if(dy >= hb || (u8int)t0 + hb > 256 && ppuy + 256 - (u8int)t0 >= hb)
+			continue;
+		sp->x = (s32int)(t0 << 7) >> 23;
+		sp->t0 = t0;
+		ws = wss[s];
+		sp->wb = sp->w = 1<<ws;
+		sp->h = h;
+		sp->t1 = t1 = p[2];
+		sp->base = vram + 0x10000 + ((t1 & 0x3ff) << 5);
+		d = (t0 & SPR8) != 0;
+		sp->ysh = (cnt & OBJNOMAT) != 0 ? 2 + d + ws : 10;
+		if((t0 & SPRROT) != 0){
+			if((t0 & SPRDOUB) != 0)
+				sp->wb <<= 1;
+			budg -= 10 + sp->w*2;
+			pp = oam + 3 + (t0 >> 21 & 0x1f0);
+			sp->dx = pp[0];
+			sp->dy = pp[8];
+			sp->rx = (dy - hb/2) * (s16int) pp[4] + (sp->w << 7) - sp->dx * sp->wb/2;
+			sp->ry = (dy - hb/2) * (s16int)pp[12] + (sp->h << 7) - sp->dy * sp->wb/2;
+			if(sp->x < 0){
+				sp->rx -= sp->x * sp->dx;
+				sp->ry -= sp->x * sp->dy;
+			}
+		}else{
+			budg -= sp->w;
+			if((t0 & SPRVFLIP) != 0)
+				dy = h - 1 - dy;
+			sp->base += (dy & 7) << 2 + d;
+			sp->base += dy >> 3 << sp->ysh;
+			if((t0 & SPRHFLIP) != 0)
+				sp->base += sp->w - 7 << 2 + d;
+			sp->inc = (1 << 5 + d) - (1 << 2 + d);
+			if(sp->x < 0)
+				if((t0 & SPRHFLIP) != 0){
+					sp->base -= ((-sp->x & 7) >> 1 - d) + (-sp->x >> 3 << 5 + d);
+					if((t0 & SPR8) == 0 && (sp->x & 1) != 0)
+						sp->base--;
+				}else
+					sp->base += ((-sp->x & 7) >> 1 - d) + (-sp->x >> 3 << 5 + d);
+		}
+		if((t0 & SPR8) != 0)
+			sp->pal = pram + 0x100;
+		else
+			sp->pal = pram + 0x100 + (t1 >> 8 & 0xf0);
+		if(budg < 0)
+			break;
+		sp++;
+	}
+}
+
+void
+spr(void)
+{
+	sprite *s;
+	ushort dx;
+	u32int t0;
+	uchar v;
+	ushort x, y;
+	u16int c;
+	int pv, ppri, pri;
+	uchar d;
+	uchar *b;
+	
+	pv = -1;
+	ppri = 6;;
+	for(s = sprt; s < sp; s++){
+		dx = ppux - s->x;
+		if(dx >= s->wb)
+			continue;
+		t0 = s->t0;
+		if((t0 & SPRROT) != 0){
+			x = s->rx >> 8;
+			y = s->ry >> 8;
+			if(x < s->w && y < s->h){
+				b = s->base;
+				d = (t0 & SPR8) != 0;
+				b += (y & 7) << 2 + d;
+				b += y >> 3 << s->ysh;
+				b += (x & 7) >> 1 - d;
+				b += x >> 3 << 5 + d;
+				v = *b;
+				if(!d)
+					if((x & 1) != 0)
+						v >>= 4;
+					else
+						v &= 0xf;
+			}else
+				v = 0;
+			s->rx += s->dx;
+			s->ry += s->dy;
+		}else if((t0 & SPRHFLIP) != 0){
+			if((t0 & SPR8) != 0)
+				v = *--s->base;
+			else if((dx & 1) != 0)
+				v = *s->base & 0x0f;
+			else
+				v = *--s->base >> 4;
+			if((dx & 7) == 7)
+				s->base -= s->inc;
+		}else{
+			v = *s->base;
+			if((t0 & SPR8) != 0)
+				s->base++;
+			else if((dx & 1) != 0){
+				v >>= 4;
+				s->base++;
+			}else
+				v &= 0xf;
+			if((dx & 7) == 7)
+				s->base += s->inc;
+		}
+		if(v != 0){
+			pri = s->t1 >> 10 & 3;
+			c = s->pal[v];
+			switch(s->t0 >> 10 & 3){
+			case 1:
+				c |= 1<<16;
+			case 0:
+				if(ppri > pri){
+					pv = c;
+					ppri = pri;
+				}
+				break;
+			case 2:
+				objwin = 1;
+				break;
+			}
+		}
+	}
+	if(pv >= 0){
+		pixel(pv, 4, ppri);
+		if(pv >> 16 != 0)
+			objtrans = 1;
+	}
+}
+
+u16int
+mix(u16int c1, u16int c2)
+{
+	u16int r, g, b, eva, evb;
+
+	eva = blda;
+	evb = bldb;
+	b = ((c1 & 0x7c00) * eva + (c2 & 0x7c00) * evb) >> 4;
+	g = ((c1 & 0x03e0) * eva + (c2 & 0x03e0) * evb) >> 4;
+	r = ((c1 & 0x001f) * eva + (c2 & 0x001f) * evb) >> 4;
+	if(b > 0x7c00) b = 0x7c00;
+	if(g > 0x03e0) g = 0x03e0;
+	if(r > 0x001f) r = 0x001f;
+	return b & 0x7c00 | g & 0x03e0 | r;
+}
+
+u16int
+brighten(u16int c1)
+{
+	u16int r, g, b, y;
+	
+	y = bldy;
+	b = c1 & 0x7c00;
+	b = b + (0x7c00 - b) * y / 16;
+	g = c1 & 0x03e0;
+	g = g + (0x03e0 - g) * y / 16;
+	r = c1 & 0x001f;
+	r = r + (0x001f - r) * y / 16;
+	if(b > 0x7c00) b = 0x7c00;
+	if(g > 0x03e0) g = 0x03e0;
+	if(r > 0x001f) r = 0x001f;
+	return b & 0x7c00 | g & 0x03e0 | r;
+}
+
+u16int
+darken(u16int c1)
+{
+	u16int r, g, b, y;
+
+	y = 16 - bldy;
+	b = c1 & 0x7c00;
+	b = b * y / 16;
+	g = c1 & 0x03e0;
+	g = g * y / 16;
+	r = c1 & 0x001f;
+	r = r * y / 16;
+	return b & 0x7c00 | g & 0x03e0 | r;
+}
+
+void
+windows(void)
+{
+	u16int dispcnt;
+	u16int v, h;
+
+	dispcnt = reg[DISPCNT];
+	bgmask = dispcnt >> 8 | 1<<5;
+	if((dispcnt >> 13) != 0){
+		if((dispcnt & 1<<13) != 0){
+			v = reg[WIN0V];
+			h = reg[WIN0H];
+			if(ppuy < (u8int)v && ppuy >= v >> 8 &&
+				ppux < (u8int)h && ppux >= h >> 8){
+				bgmask &= reg[WININ];
+				goto windone;
+			}
+		}
+		if((dispcnt & 1<<14) != 0){
+			v = reg[WIN1V];
+			h = reg[WIN1H];
+			if(ppuy < (u8int)v && ppuy >= v >> 8 &&
+				ppux < (u8int)h && ppux >= h >> 8){
+				bgmask &= reg[WININ] >> 8;
+				goto windone;
+			}
+		}
+		if((dispcnt & 1<<15) != 0 && objwin != 0){
+			bgmask &= reg[WINOUT] >> 8;
+			goto windone;
+		}
+		bgmask &= reg[WINOUT];
+	}
+windone:
+	if(pixelpri[0] != 8 && (bgmask & 1<<4) == 0){
+		pixelpri[0] = 8;
+		pixeldat[0] = pram[0] | 5 << 16;
+	}
+}
+
+void
+colormath(void)
+{
+	u8int src0;
+	u16int bldcnt;
+	
+	if((bgmask & 1<<5) == 0)
+		return;
+	bldcnt = reg[BLDCNT];
+	src0 = pixeldat[0] >> 16;
+	if(objtrans && src0 == 4)
+		goto alpha;
+	if((bldcnt & 3<<6) == 0 || (bldcnt & 1<<src0) == 0)
+		return;
+	switch(bldcnt >> 6 & 3){
+	case 1:
+	alpha:
+		if((bldcnt & 1<<8+(pixeldat[1]>>16)) == 0)
+			return;
+		pixeldat[0] = mix(pixeldat[0], pixeldat[1]);
+		break;
+	case 2:
+		pixeldat[0] = brighten(pixeldat[0]);
+		break;
+	case 3:
+		pixeldat[0] = darken(pixeldat[0]);
+		break;
+	}
+}
+
+void
+ppustep(void)
+{
+	u16int stat;
+	u16int cnt;
+	
+	stat = reg[DISPSTAT];
+	cnt = reg[DISPCNT];
+	if(ppuy < 160 && ppux < 240)
+		if((cnt & FBLANK) == 0){
+			objwin = 0;
+			objtrans = 0;
+			pixelpri[0] = 8;
+			pixeldat[0] = pram[0] | 5 << 16;
+			if((cnt & 1<<12) != 0)
+				spr();
+			windows();
+			bgs();
+			colormath();
+			pixeldraw(ppux, ppuy, pixeldat[0]);
+		}else
+			pixeldraw(ppux, ppuy, 0xffff);
+	if(ppux == 240 && ppuy < 160){
+		if((stat & IRQHBLEN) != 0)
+			setif(IRQHBL);
+		dmastart(DMAHBL);
+	}
+	if(++ppux >= 308){
+		ppux = 0;
+		if(++ppuy >= 228){
+			ppuy = 0;
+			flush();
+		}
+		if((stat & IRQVCTREN) != 0 && ppuy == stat >> 8)
+			setif(IRQVCTR);
+		if(ppuy < 160){
+			bgsinit();
+			sprinit();
+		}else if(ppuy == 160){
+			if((stat & IRQVBLEN) != 0)
+				setif(IRQVBL);
+			dmastart(DMAVBL);
+		}
+	}
+}
--- /dev/null
+++ b/sys/src/games/gba/test.c
@@ -1,0 +1,15 @@
+#include <u.h>
+#include <libc.h>
+
+void main()
+{
+	int fd;
+	Dir *d;
+	int n, i;
+	
+	fd = open(".", OREAD);
+	n = dirreadall(fd, &d);
+	for(i = 0; i < n; i++)
+		if(d[i].name[0] == '\xef')
+			remove(d[i].name);
+}
\ No newline at end of file