ref: 61a062ee9ff7bbd489baba6dd7c6adb1978e246b
dir: /sys/src/9/kw/fpiarm.c/
/* * this doesn't attempt to implement ARM floating-point properties * that aren't visible in the Inferno environment. * all arithmetic is done in double precision. * the FP trap status isn't updated. */ #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "ureg.h" #include "arm.h" #include "fpi.h" /* undef this if correct kernel r13 isn't in Ureg; * check calculation in fpiarm below */ #define REG(ur, x) (*(long*)(((char*)(ur))+roff[(x)])) #define FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&(Nfpctlregs - 1)]) typedef struct FP2 FP2; typedef struct FP1 FP1; struct FP2 { char* name; void (*f)(Internal, Internal, Internal*); }; struct FP1 { char* name; void (*f)(Internal*, Internal*); }; enum { N = 1<<31, Z = 1<<30, C = 1<<29, V = 1<<28, REGPC = 15, }; enum { fpemudebug = 0, }; #undef OFR #define OFR(X) ((ulong)&((Ureg*)0)->X) static int roff[] = { OFR(r0), OFR(r1), OFR(r2), OFR(r3), OFR(r4), OFR(r5), OFR(r6), OFR(r7), OFR(r8), OFR(r9), OFR(r10), OFR(r11), OFR(r12), OFR(r13), OFR(r14), OFR(pc), }; static Internal fpconst[8] = { /* indexed by op&7 (ARM 7500 FPA) */ /* s, e, l, h */ {0, 0x1, 0x00000000, 0x00000000}, /* 0.0 */ {0, 0x3FF, 0x00000000, 0x08000000}, /* 1.0 */ {0, 0x400, 0x00000000, 0x08000000}, /* 2.0 */ {0, 0x400, 0x00000000, 0x0C000000}, /* 3.0 */ {0, 0x401, 0x00000000, 0x08000000}, /* 4.0 */ {0, 0x401, 0x00000000, 0x0A000000}, /* 5.0 */ {0, 0x3FE, 0x00000000, 0x08000000}, /* 0.5 */ {0, 0x402, 0x00000000, 0x0A000000}, /* 10.0 */ }; /* * arm binary operations */ static void fadd(Internal m, Internal n, Internal *d) { (m.s == n.s? fpiadd: fpisub)(&m, &n, d); } static void fsub(Internal m, Internal n, Internal *d) { m.s ^= 1; (m.s == n.s? fpiadd: fpisub)(&m, &n, d); } static void fsubr(Internal m, Internal n, Internal *d) { n.s ^= 1; (n.s == m.s? fpiadd: fpisub)(&n, &m, d); } static void fmul(Internal m, Internal n, Internal *d) { fpimul(&m, &n, d); } static void fdiv(Internal m, Internal n, Internal *d) { fpidiv(&m, &n, d); } static void fdivr(Internal m, Internal n, Internal *d) { fpidiv(&n, &m, d); } /* * arm unary operations */ static void fmov(Internal *m, Internal *d) { *d = *m; } static void fmovn(Internal *m, Internal *d) { *d = *m; d->s ^= 1; } static void fabsf(Internal *m, Internal *d) { *d = *m; d->s = 0; } static void frnd(Internal *m, Internal *d) { short e; (m->s? fsub: fadd)(fpconst[6], *m, d); if(IsWeird(d)) return; fpiround(d); e = (d->e - ExpBias) + 1; if(e <= 0) SetZero(d); else if(e > FractBits){ if(e < 2*FractBits) d->l &= ~((1<<(2*FractBits - e))-1); }else{ d->l = 0; if(e < FractBits) d->h &= ~((1<<(FractBits-e))-1); } } /* * ARM 7500 FPA opcodes */ static FP1 optab1[16] = { /* Fd := OP Fm */ [0] {"MOVF", fmov}, [1] {"NEGF", fmovn}, [2] {"ABSF", fabsf}, [3] {"RNDF", frnd}, [4] {"SQTF", /*fsqt*/0}, /* LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN all `deprecated' */ /* URD and NRM aren't implemented */ }; static FP2 optab2[16] = { /* Fd := Fn OP Fm */ [0] {"ADDF", fadd}, [1] {"MULF", fmul}, [2] {"SUBF", fsub}, [3] {"RSUBF", fsubr}, [4] {"DIVF", fdiv}, [5] {"RDIVF", fdivr}, /* POW, RPW deprecated */ [8] {"REMF", /*frem*/0}, [9] {"FMF", fmul}, /* fast multiply */ [10] {"FDV", fdiv}, /* fast divide */ [11] {"FRD", fdivr}, /* fast reverse divide */ /* POL deprecated */ }; /* * ARM VFP opcodes */ static FP1 voptab1[32] = { /* Vd := OP Vm */ [0] {"MOVF", fmov}, [1] {"ABSF", fabsf}, [2] {"NEGF", fmovn}, [15] {"CVTF", fmov}, }; static FP2 voptab2[16] = { /* Vd := Vn FOP Fm */ [4] {"MULF", fmul}, [6] {"ADDF", fadd}, [7] {"SUBF", fsub}, [8] {"DIVF", fdiv}, }; static ulong fcmp(Internal *n, Internal *m) { int i; Internal rm, rn; if(IsWeird(m) || IsWeird(n)){ /* BUG: should trap if not masked */ return V|C; } rn = *n; rm = *m; fpiround(&rn); fpiround(&rm); i = fpicmp(&rn, &rm); if(i > 0) return C; else if(i == 0) return C|Z; else return N; } static void fld(void (*f)(Internal*, void*), int d, ulong ea, int n, FPsave *ufp) { void *mem; mem = (void*)ea; (*f)(&FR(ufp, d), mem); if(fpemudebug) print("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d); } static void fst(void (*f)(void*, Internal*), ulong ea, int s, int n, FPsave *ufp) { Internal tmp; void *mem; mem = (void*)ea; tmp = FR(ufp, s); if(fpemudebug) print("MOV%c F%d,#%lux\n", n==8? 'D': 'F', s, ea); (*f)(mem, &tmp); } static int condok(int cc, int c) { switch(c){ case 0: /* Z set */ return cc&Z; case 1: /* Z clear */ return (cc&Z) == 0; case 2: /* C set */ return cc&C; case 3: /* C clear */ return (cc&C) == 0; case 4: /* N set */ return cc&N; case 5: /* N clear */ return (cc&N) == 0; case 6: /* V set */ return cc&V; case 7: /* V clear */ return (cc&V) == 0; case 8: /* C set and Z clear */ return cc&C && (cc&Z) == 0; case 9: /* C clear or Z set */ return (cc&C) == 0 || cc&Z; case 10: /* N set and V set, or N clear and V clear */ return (~cc&(N|V))==0 || (cc&(N|V)) == 0; case 11: /* N set and V clear, or N clear and V set */ return (cc&(N|V))==N || (cc&(N|V))==V; case 12: /* Z clear, and either N set and V set or N clear and V clear */ return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0); case 13: /* Z set, or N set and V clear or N clear and V set */ return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V; case 14: /* always */ return 1; case 15: /* never (reserved) */ return 0; } return 0; /* not reached */ } static void unimp(ulong pc, ulong op) { char buf[60]; snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.8lux", pc, op); if(fpemudebug) print("FPE: %s\n", buf); error(buf); /* no return */ } static void fpaemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp) { int rn, rd, tag, o; long off; ulong ea; Internal tmp, *fm, *fn; /* note: would update fault status here if we noted numeric exceptions */ /* * LDF, STF; 10.1.1 */ if(((op>>25)&7) == 6){ if(op & (1<<22)) unimp(pc, op); /* packed or extended */ rn = (op>>16)&0xF; off = (op&0xFF)<<2; if((op & (1<<23)) == 0) off = -off; ea = REG(ur, rn); if(rn == REGPC) ea += 8; if(op & (1<<24)) ea += off; rd = (op>>12)&7; if(op & (1<<20)){ if(op & (1<<15)) fld(fpid2i, rd, ea, 8, ufp); else fld(fpis2i, rd, ea, 4, ufp); }else{ if(op & (1<<15)) fst(fpii2d, ea, rd, 8, ufp); else fst(fpii2s, ea, rd, 4, ufp); } if((op & (1<<24)) == 0) ea += off; if(op & (1<<21)) REG(ur, rn) = ea; return; } /* * CPRT/transfer, 10.3 */ if(op & (1<<4)){ rd = (op>>12) & 0xF; /* * compare, 10.3.1 */ if(rd == 15 && op & (1<<20)){ rn = (op>>16)&7; fn = &FR(ufp, rn); if(op & (1<<3)){ fm = &fpconst[op&7]; if(fpemudebug) tag = 'C'; }else{ fm = &FR(ufp, op&7); if(fpemudebug) tag = 'F'; } switch((op>>21)&7){ default: unimp(pc, op); case 4: /* CMF: Fn :: Fm */ case 6: /* CMFE: Fn :: Fm (with exception) */ ur->psr &= ~(N|C|Z|V); ur->psr |= fcmp(fn, fm); break; case 5: /* CNF: Fn :: -Fm */ case 7: /* CNFE: Fn :: -Fm (with exception) */ tmp = *fm; tmp.s ^= 1; ur->psr &= ~(N|C|Z|V); ur->psr |= fcmp(fn, &tmp); break; } if(fpemudebug) print("CMPF %c%d,F%ld =%#lux\n", tag, rn, op&7, ur->psr>>28); return; } /* * other transfer, 10.3 */ switch((op>>20)&0xF){ default: unimp(pc, op); case 0: /* FLT */ rn = (op>>16) & 7; fpiw2i(&FR(ufp, rn), ®(ur, rd)); if(fpemudebug) print("MOVW[FD] R%d, F%d\n", rd, rn); break; case 1: /* FIX */ if(op & (1<<3)) unimp(pc, op); rn = op & 7; tmp = FR(ufp, rn); fpii2w(®(ur, rd), &tmp); if(fpemudebug) print("MOV[FD]W F%d, R%d =%ld\n", rn, rd, REG(ur, rd)); break; case 2: /* FPSR := Rd */ ufp->status = REG(ur, rd); if(fpemudebug) print("MOVW R%d, FPSR\n", rd); break; case 3: /* Rd := FPSR */ REG(ur, rd) = ufp->status; if(fpemudebug) print("MOVW FPSR, R%d\n", rd); break; case 4: /* FPCR := Rd */ ufp->control = REG(ur, rd); if(fpemudebug) print("MOVW R%d, FPCR\n", rd); break; case 5: /* Rd := FPCR */ REG(ur, rd) = ufp->control; if(fpemudebug) print("MOVW FPCR, R%d\n", rd); break; } return; } /* * arithmetic */ if(op & (1<<3)){ /* constant */ fm = &fpconst[op&7]; if(fpemudebug) tag = 'C'; }else{ fm = &FR(ufp, op&7); if(fpemudebug) tag = 'F'; } rd = (op>>12)&7; o = (op>>20)&0xF; if(op & (1<<15)){ /* monadic */ FP1 *fp; fp = &optab1[o]; if(fp->f == nil) unimp(pc, op); if(fpemudebug) print("%s %c%ld,F%d\n", fp->name, tag, op&7, rd); (*fp->f)(fm, &FR(ufp, rd)); } else { FP2 *fp; fp = &optab2[o]; if(fp->f == nil) unimp(pc, op); rn = (op>>16)&7; if(fpemudebug) print("%s %c%ld,F%d,F%d\n", fp->name, tag, op&7, rn, rd); (*fp->f)(*fm, FR(ufp, rn), &FR(ufp, rd)); } } static void vfpoptoi(Internal *fm, uchar o2, uchar o4) { fm->s = o2>>3; fm->e = ((o2>>3) | ~(o2 & 4)) - 3 + ExpBias; fm->l = 0; fm->h = o4 << (20+NGuardBits); if(fm->e) fm->h |= HiddenBit; else fm->e++; } static void vfpemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp) { int sz, vd, o1, o2, o3, o4, o, tag; long off; ulong ea; Word w; Internal *fm, fc; /* note: would update fault status here if we noted numeric exceptions */ sz = op & (1<<8); o1 = (op>>20) & 0xF; o2 = (op>>16) & 0xF; vd = (op>>12) & 0xF; switch((op>>24) & 0xF){ default: unimp(pc, op); case 0xD: /* * Extension Register load/store A7.6 */ off = (op&0xFF)<<2; if((op & (1<<23)) == 0) off = -off; ea = REG(ur, o2) + off; switch(o1&0x7){ /* D(Bit 22) = 0 (5l) */ default: unimp(pc, op); case 0: if(sz) fst(fpii2d, ea, vd, sz, ufp); else fst(fpii2s, ea, vd, sz, ufp); break; case 1: if(sz) fld(fpid2i, vd, ea, sz, ufp); else fld(fpis2i, vd, ea, sz, ufp); break; } break; case 0xE: if(op & (1<<4)){ /* * Register transfer between Core & Extension A7.8 */ if(sz) /* C(Bit 8) != 0 */ unimp(pc, op); switch(o1){ default: unimp(pc, op); case 0: /* Fn := Rt */ *((Word*)&FR(ufp, o2)) = REG(ur, vd); if(fpemudebug) print("MOVWF R%d, F%d\n", vd, o2); break; case 1: /* Rt := Fn */ REG(ur, vd) = *((Word*)&FR(ufp, o2)); if(fpemudebug) print("MOVFW F%d, R%d =%ld\n", o2, vd, REG(ur, vd)); break; case 0xE: /* FPSCR := Rt */ ufp->status = REG(ur, vd); if(fpemudebug) print("MOVW R%d, FPSCR\n", vd); break; case 0xF: /* Rt := FPSCR */ if(vd == 0xF){ ur->psr = ufp->status; if(fpemudebug) print("MOVW FPSCR, PSR\n"); }else{ REG(ur, vd) = ufp->status; if(fpemudebug) print("MOVW FPSCR, R%d\n", vd); } break; } } else{ /* * VFP data processing instructions A7.5 * Note: As per 5l we ignore (D, N, M) bits */ if(fpemudebug) tag = 'F'; o3 = (op>>6) & 0x3; o4 = op & 0xF; fm = &FR(ufp, o4); if(o1 == 0xB){ /* A7-17 */ if(o3 & 0x1){ switch(o2){ default: o = (o2<<1) | (o3>>1); break; case 0x8: /* CVT int -> float/double */ w = *((Word*)fm); fpiw2i(&FR(ufp, vd), &w); if(fpemudebug) print("CVTW%c F%d, F%d\n", sz?'D':'F', o4, vd); return; case 0xD: /* CVT float/double -> int */ fpii2w(&w, fm); *((Word*)&FR(ufp, vd)) = w; if(fpemudebug) print("CVT%cW F%d, F%d\n", sz?'D':'F', o4, vd); return; case 0x5: /* CMPF(E) */ fm = &fpconst[0]; if(fpemudebug) tag = 'C'; case 0x4: /* CMPF(E) */ ufp->status &= ~(N|C|Z|V); ufp->status |= fcmp(&FR(ufp, vd), fm); if(fpemudebug) print("CMPF %c%d,F%d =%#lux\n", tag, (o2&0x1)? 0: o4, vd, ufp->status>>28); return; } }else{ /* VMOV imm (VFPv3 & v4) (5l doesn't generate) */ vfpoptoi(&fc, o2, o4); fm = &fc; o = 0; if(fpemudebug) tag = 'C'; } FP1 *vfp; vfp = &voptab1[o]; if(vfp->f == nil) unimp(pc, op); if(fpemudebug) print("%s %c%d,F%d\n", vfp->name, tag, o4, vd); (*vfp->f)(fm, &FR(ufp, vd)); } else { /* A7-16 */ FP2 *vfp; o = ((o1&0x3)<<1) | (o1&0x8) | (o3&0x1); vfp = &voptab2[o]; if(vfp->f == nil) unimp(pc, op); if(fpemudebug) print("%s F%d,F%d,F%d\n", vfp->name, o4, o2, vd); (*vfp->f)(*fm, FR(ufp, o2), &FR(ufp, vd)); } } break; } } void casemu(ulong pc, ulong op, Ureg *ur) { ulong *rp, ro, rn, *rd; USED(pc); rp = (ulong*)ur; ro = rp[op>>16 & 0x7]; rn = rp[op>>0 & 0x7]; rd = rp + (op>>12 & 0x7); rp = (ulong*)*rd; validaddr((uintptr)rp, 4, 1); splhi(); if(*rd = (*rp == ro)) *rp = rn; spllo(); } int ldrexvalid; void ldrex(ulong pc, ulong op, Ureg *ur) { ulong *rp, *rd, *addr; USED(pc); rp = (ulong*)ur; rd = rp + (op>>16 & 0x7); addr = (ulong*)*rd; validaddr((uintptr)addr, 4, 0); ldrexvalid = 1; rp[op>>12 & 0x7] = *addr; if(fpemudebug) print("ldrex, r%ld = [r%ld]@0x%8.8p = 0x%8.8lux", op>>12 & 0x7, op>>16 & 0x7, addr, rp[op>>12 & 0x7]); } void strex(ulong pc, ulong op, Ureg *ur) { ulong *rp, rn, *rd, *addr; USED(pc); rp = (ulong*)ur; rd = rp + (op>>16 & 0x7); rn = rp[op>>0 & 0x7]; addr = (ulong*)*rd; validaddr((uintptr)addr, 4, 1); splhi(); if(ldrexvalid){ if(fpemudebug) print("strex valid, [r%ld]@0x%8.8p = r%ld = 0x%8.8lux", op>>16 & 0x7, addr, op>>0 & 0x7, rn); *addr = rn; ldrexvalid = 0; rp[op>>12 & 0x7] = 0; }else{ if(fpemudebug) print("strex invalid, r%ld = 1", op>>16 & 0x7); rp[op>>12 & 0x7] = 1; } spllo(); } struct { ulong opc; ulong mask; void (*f)(ulong, ulong, Ureg*); } specialopc[] = { { 0x01900f9f, 0x0ff00fff, ldrex }, { 0x01800f90, 0x0ff00ff0, strex }, { 0x0ed00100, 0x0ef08100, casemu }, { 0x00000000, 0x00000000, nil } }; /* * returns the number of FP instructions emulated */ int fpiarm(Ureg *ur) { ulong op, o, cp; FPsave *ufp; int i, n; if(up == nil) panic("fpiarm not in a process"); ufp = up->fpsave; /* * because all the emulated fp state is in the proc structure, * it need not be saved/restored */ if(up->fpstate != FPactive){ // assert(sizeof(Internal) == sizeof(ufp->regs[0])); up->fpstate = FPactive; ufp->control = 0; ufp->status = (0x01<<28)|(1<<12); /* software emulation, alternative C flag */ for(n = 0; n < Nfpctlregs; n++) FR(ufp, n) = fpconst[0]; } for(n=0; ;n++){ validaddr(ur->pc, 4, 0); op = *(ulong*)(ur->pc); if(fpemudebug) print("%#lux: %#8.8lux ", ur->pc, op); o = (op>>24) & 0xF; cp = (op>>8) & 0xF; for(i = 0; specialopc[i].f; i++) if((op & specialopc[i].mask) == specialopc[i].opc) break; if(specialopc[i].f){ if(condok(ur->psr, op>>28)) specialopc[i].f(ur->pc, op, ur); } else if(ISVFPOP(cp, o)){ if(condok(ur->psr, op>>28)) vfpemu(ur->pc, op, ur, ufp); } else if(ISFPAOP(cp, o)){ if(condok(ur->psr, op>>28)) fpaemu(ur->pc, op, ur, ufp); } else break; ur->pc += 4; /* pretend cpu executed the instr */ } if(fpemudebug) print("\n"); return n; }