ref: 4c21da2ededfb5fc628dbee0d75b0aa5263ef97b
parent: ca5b491753ad363da8d9deb5978b56231ec27bf5
parent: eb168924840977fe0941d4fc90e56db0d484a6e4
author: cinap_lenrek <[email protected]>
date: Thu Apr 28 16:53:53 EDT 2016
merge
--- a/sys/include/regexp.h
+++ b/sys/include/regexp.h
@@ -1,15 +1,29 @@
-#pragma src "/sys/src/libregexp"
-#pragma lib "libregexp.a"
+#pragma src "/sys/src/libregexp"
+#pragma lib "libregexp.a"
+enum
+{
+ OANY = 0,
+ OBOL,
+ OCLASS,
+ OEOL,
+ OJMP,
+ ONOTNL,
+ ORUNE,
+ OSAVE,
+ OSPLIT,
+ OUNSAVE,
+};
-typedef struct Resub Resub;
-typedef struct Reclass Reclass;
-typedef struct Reinst Reinst;
-typedef struct Reprog Reprog;
+typedef struct Resub Resub;
+typedef struct Reinst Reinst;
+typedef struct Reprog Reprog;
+typedef struct Rethread Rethread;
-/*
- * Sub expression matches
- */
-struct Resub{
+#pragma incomplete Reinst
+#pragma incomplete Rethread
+
+struct Resub
+{
union
{
char *sp;
@@ -21,46 +35,21 @@
Rune *rep;
};
};
-
-/*
- * character class, each pair of rune's defines a range
- */
-struct Reclass{
- Rune *end;
- Rune spans[64];
+struct Reprog
+{
+ Reinst *startinst;
+ Rethread *threads;
+ char *regstr;
+ int len;
+ int nthr;
};
-/*
- * Machine instructions
- */
-struct Reinst{
- int type;
- union {
- Reclass *cp; /* class pointer */
- Rune r; /* character */
- int subid; /* sub-expression id for RBRA and LBRA */
- Reinst *right; /* right child of OR */
- };
- union { /* regexp relies on these two being in the same union */
- Reinst *left; /* left child of OR */
- Reinst *next; /* next instruction for CAT & LBRA */
- };
-};
-
-/*
- * Reprogram definition
- */
-struct Reprog{
- Reinst *startinst; /* start pc */
- Reclass class[16]; /* .data */
- Reinst firstinst[5]; /* .text */
-};
-
-extern Reprog *regcomp(char*);
-extern Reprog *regcomplit(char*);
-extern Reprog *regcompnl(char*);
-extern void regerror(char*);
-extern int regexec(Reprog*, char*, Resub*, int);
-extern void regsub(char*, char*, int, Resub*, int);
-extern int rregexec(Reprog*, Rune*, Resub*, int);
-extern void rregsub(Rune*, Rune*, int, Resub*, int);
+Reprog* regcomp(char*);
+Reprog* regcomplit(char*);
+Reprog* regcompnl(char*);
+void regerror(char*);
+int regexec(Reprog*, char*, Resub*, int);
+void regsub(char*, char*, int, Resub*, int);
+int rregexec(Reprog*, Rune*, Resub*, int);
+void rregsub(Rune*, Rune*, int, Resub*, int);
+int reprogfmt(Fmt *);
--- a/sys/src/ape/lib/mkfile
+++ b/sys/src/ape/lib/mkfile
@@ -1,6 +1,6 @@
</$objtype/mkfile
-DIRS=9 ap auth bio bsd bz2 draw fmt l mp net regexp sec utf v z
+DIRS=9 ap auth bio bsd bz2 draw fmt l mp net sec utf v z
none:V:
echo mk all, install, installall, clean, or nuke
--- a/sys/src/ape/lib/regexp/mkfile
+++ /dev/null
@@ -1,15 +1,0 @@
-APE=/sys/src/ape
-<$APE/config
-
-LIB=/$objtype/lib/ape/libregexp.a
-OFILES=regcomp.$O\
- regerror.$O\
- regexec.$O\
- regsub.$O\
- regaux.$O\
- rregexec.$O\
- rregsub.$O\
-
-</sys/src/cmd/mksyslib
-
-CFLAGS=-c -DUTF -D_REGEXP_EXTENSION
--- a/sys/src/ape/lib/regexp/regaux.c
+++ /dev/null
@@ -1,56 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-/*
- * Machine state
- */
-Relist* _relist[2];
-Relist* _reliste[2];
-int _relistsize = LISTINCREMENT;
-
-/*
- * save a new match in mp
- */
-extern void
-_renewmatch(Resub *mp, int ms, Resublist *sp)
-{
- int i;
-
- if(mp==0 || ms<=0)
- return;
- if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp ||
- (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){
- for(i=0; i<ms && i<NSUBEXP; i++)
- mp[i] = sp->m[i];
- for(; i<ms; i++)
- mp[i].s.sp = mp[i].e.ep = 0;
- }
-}
-
-/*
- * Note optimization in _renewthread:
- * *lp must be pending when _renewthread called; if *l has been looked
- * at already, the optimization is a bug.
- */
-extern Relist*
-_renewthread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- Resublist *sep) /* pointers to subexpressions */
-{
- Relist *p;
-
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if((sep)->m[0].s.sp < p->se.m[0].s.sp)
- p->se = *sep;
- return 0;
- }
- }
- p->inst = ip;
- p->se = *sep;
- (++p)->inst = 0;
- return p;
-}
-
--- a/sys/src/ape/lib/regexp/regcomp.c
+++ /dev/null
@@ -1,560 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <setjmp.h>
-#include <string.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-#define TRUE 1
-#define FALSE 0
-
-/*
- * Parser Information
- */
-typedef
-struct Node
-{
- Reinst* first;
- Reinst* last;
-}Node;
-
-#define NSTACK 20
-static Node andstack[NSTACK];
-static Node *andp;
-static int atorstack[NSTACK];
-static int* atorp;
-static int cursubid; /* id of current subexpression */
-static int subidstack[NSTACK]; /* parallel to atorstack */
-static int* subidp;
-static int lastwasand; /* Last token was operand */
-static int nbra;
-static char* exprp; /* pointer to next character in source expression */
-static int lexdone;
-static int nclass;
-static Reclass*classp;
-static Reinst* freep;
-static int errors;
-static wchar_t yyrune; /* last lex'd rune */
-static Reclass*yyclassp; /* last lex'd class */
-
-/* predeclared crap */
-static void operator(int);
-static void pushand(Reinst*, Reinst*);
-static void pushator(int);
-static void evaluntil(int);
-static int bldcclass(void);
-
-static jmp_buf regkaboom;
-
-static void
-rcerror(char *s)
-{
- errors++;
- regerror(s);
- longjmp(regkaboom, 1);
-}
-
-static Reinst*
-newinst(int t)
-{
- freep->type = t;
- freep->l.left = 0;
- freep->r.right = 0;
- return freep++;
-}
-
-static void
-operand(int t)
-{
- Reinst *i;
-
- if(lastwasand)
- operator(CAT); /* catenate is implicit */
- i = newinst(t);
-
- if(t == CCLASS || t == NCCLASS)
- i->r.cp = yyclassp;
- if(t == RUNE)
- i->r.r = yyrune;
-
- pushand(i, i);
- lastwasand = TRUE;
-}
-
-static void
-operator(int t)
-{
- if(t==RBRA && --nbra<0)
- rcerror("unmatched right paren");
- if(t==LBRA){
- if(++cursubid >= NSUBEXP)
- rcerror ("too many subexpressions");
- nbra++;
- if(lastwasand)
- operator(CAT);
- } else
- evaluntil(t);
- if(t != RBRA)
- pushator(t);
- lastwasand = FALSE;
- if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
- lastwasand = TRUE; /* these look like operands */
-}
-
-static void
-regerr2(char *s, int c)
-{
- char buf[100];
- char *cp = buf;
- while(*s)
- *cp++ = *s++;
- *cp++ = c;
- *cp = '\0';
- rcerror(buf);
-}
-
-static void
-cant(char *s)
-{
- char buf[100];
- strcpy(buf, "can't happen: ");
- strcat(buf, s);
- rcerror(buf);
-}
-
-static void
-pushand(Reinst *f, Reinst *l)
-{
- if(andp >= &andstack[NSTACK])
- cant("operand stack overflow");
- andp->first = f;
- andp->last = l;
- andp++;
-}
-
-static void
-pushator(int t)
-{
- if(atorp >= &atorstack[NSTACK])
- cant("operator stack overflow");
- *atorp++ = t;
- *subidp++ = cursubid;
-}
-
-static Node*
-popand(int op)
-{
- Reinst *inst;
-
- if(andp <= &andstack[0]){
- regerr2("missing operand for ", op);
- inst = newinst(NOP);
- pushand(inst,inst);
- }
- return --andp;
-}
-
-static int
-popator(void)
-{
- if(atorp <= &atorstack[0])
- cant("operator stack underflow");
- --subidp;
- return *--atorp;
-}
-
-static void
-evaluntil(int pri)
-{
- Node *op1, *op2;
- Reinst *inst1, *inst2;
-
- while(pri==RBRA || atorp[-1]>=pri){
- switch(popator()){
- default:
- rcerror("unknown operator in evaluntil");
- break;
- case LBRA: /* must have been RBRA */
- op1 = popand('(');
- inst2 = newinst(RBRA);
- inst2->r.subid = *subidp;
- op1->last->l.next = inst2;
- inst1 = newinst(LBRA);
- inst1->r.subid = *subidp;
- inst1->l.next = op1->first;
- pushand(inst1, inst2);
- return;
- case OR:
- op2 = popand('|');
- op1 = popand('|');
- inst2 = newinst(NOP);
- op2->last->l.next = inst2;
- op1->last->l.next = inst2;
- inst1 = newinst(OR);
- inst1->r.right = op1->first;
- inst1->l.left = op2->first;
- pushand(inst1, inst2);
- break;
- case CAT:
- op2 = popand(0);
- op1 = popand(0);
- op1->last->l.next = op2->first;
- pushand(op1->first, op2->last);
- break;
- case STAR:
- op2 = popand('*');
- inst1 = newinst(OR);
- op2->last->l.next = inst1;
- inst1->r.right = op2->first;
- pushand(inst1, inst1);
- break;
- case PLUS:
- op2 = popand('+');
- inst1 = newinst(OR);
- op2->last->l.next = inst1;
- inst1->r.right = op2->first;
- pushand(op2->first, inst1);
- break;
- case QUEST:
- op2 = popand('?');
- inst1 = newinst(OR);
- inst2 = newinst(NOP);
- inst1->l.left = inst2;
- inst1->r.right = op2->first;
- op2->last->l.next = inst2;
- pushand(inst1, inst2);
- break;
- }
- }
-}
-
-static Reprog*
-optimize(Reprog *pp)
-{
- Reinst *inst, *target;
- int size;
- Reprog *npp;
- int diff;
-
- /*
- * get rid of NOOP chains
- */
- for(inst=pp->firstinst; inst->type!=END; inst++){
- target = inst->l.next;
- while(target->type == NOP)
- target = target->l.next;
- inst->l.next = target;
- }
-
- /*
- * The original allocation is for an area larger than
- * necessary. Reallocate to the actual space used
- * and then relocate the code.
- */
- size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
- npp = realloc(pp, size);
- if(npp==0 || npp==pp)
- return pp;
- diff = (char *)npp - (char *)pp;
- freep = (Reinst *)((char *)freep + diff);
- for(inst=npp->firstinst; inst<freep; inst++){
- switch(inst->type){
- case OR:
- case STAR:
- case PLUS:
- case QUEST:
- case CCLASS:
- case NCCLASS:
- *(char **)&inst->r.right += diff;
- break;
- }
- *(char **)&inst->l.left += diff;
- }
- *(char **)&npp->startinst += diff;
- return npp;
-}
-
-#ifdef DEBUG
-static void
-dumpstack(void){
- Node *stk;
- int *ip;
-
- print("operators\n");
- for(ip=atorstack; ip<atorp; ip++)
- print("0%o\n", *ip);
- print("operands\n");
- for(stk=andstack; stk<andp; stk++)
- print("0%o\t0%o\n", stk->first->type, stk->last->type);
-}
-
-static void
-dump(Reprog *pp)
-{
- Reinst *l;
- wchar_t *p;
-
- l = pp->firstinst;
- do{
- print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
- l->l.left-pp->firstinst, l->l.right-pp->firstinst);
- if(l->type == RUNE)
- print("\t%C\n", l->r);
- else if(l->type == CCLASS || l->type == NCCLASS){
- print("\t[");
- if(l->type == NCCLASS)
- print("^");
- for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
- if(p[0] == p[1])
- print("%C", p[0]);
- else
- print("%C-%C", p[0], p[1]);
- print("]\n");
- } else
- print("\n");
- }while(l++->type);
-}
-#endif
-
-static Reclass*
-newclass(void)
-{
- if(nclass >= NCLASS)
- regerr2("too many character classes; limit", NCLASS+'0');
- return &(classp[nclass++]);
-}
-
-static int
-nextc(wchar_t *rp)
-{
- int n;
-
- if(lexdone){
- *rp = 0;
- return 1;
- }
- n = mbtowc(rp, exprp, MB_CUR_MAX);
- if (n <= 0)
- n = 1;
- exprp += n;
- if(*rp == L'\\'){
- n = mbtowc(rp, exprp, MB_CUR_MAX);
- if (n <= 0)
- n = 1;
- exprp += n;
- return 1;
- }
- if(*rp == 0)
- lexdone = 1;
- return 0;
-}
-
-static int
-lex(int literal, int dot_type)
-{
- int quoted;
-
- quoted = nextc(&yyrune);
- if(literal || quoted){
- if(yyrune == 0)
- return END;
- return RUNE;
- }
-
- switch(yyrune){
- case 0:
- return END;
- case L'*':
- return STAR;
- case L'?':
- return QUEST;
- case L'+':
- return PLUS;
- case L'|':
- return OR;
- case L'.':
- return dot_type;
- case L'(':
- return LBRA;
- case L')':
- return RBRA;
- case L'^':
- return BOL;
- case L'$':
- return EOL;
- case L'[':
- return bldcclass();
- }
- return RUNE;
-}
-
-static int
-bldcclass(void)
-{
- int type;
- wchar_t r[NCCRUNE];
- wchar_t *p, *ep, *np;
- wchar_t rune;
- int quoted;
-
- /* we have already seen the '[' */
- type = CCLASS;
- yyclassp = newclass();
-
- /* look ahead for negation */
- ep = r;
- quoted = nextc(&rune);
- if(!quoted && rune == L'^'){
- type = NCCLASS;
- quoted = nextc(&rune);
- *ep++ = L'\n';
- *ep++ = L'\n';
- }
-
- /* parse class into a set of spans */
- for(; ep<&r[NCCRUNE];){
- if(rune == 0){
- rcerror("malformed '[]'");
- return 0;
- }
- if(!quoted && rune == L']')
- break;
- if(!quoted && rune == L'-'){
- if(ep == r){
- rcerror("malformed '[]'");
- return 0;
- }
- quoted = nextc(&rune);
- if((!quoted && rune == L']') || rune == 0){
- rcerror("malformed '[]'");
- return 0;
- }
- *(ep-1) = rune;
- } else {
- *ep++ = rune;
- *ep++ = rune;
- }
- quoted = nextc(&rune);
- }
-
- /* sort on span start */
- for(p = r; p < ep; p += 2){
- for(np = p; np < ep; np += 2)
- if(*np < *p){
- rune = np[0];
- np[0] = p[0];
- p[0] = rune;
- rune = np[1];
- np[1] = p[1];
- p[1] = rune;
- }
- }
-
- /* merge spans */
- np = yyclassp->spans;
- p = r;
- if(r == ep)
- yyclassp->end = np;
- else {
- np[0] = *p++;
- np[1] = *p++;
- for(; p < ep; p += 2)
- if(p[0] <= np[1]){
- if(p[1] > np[1])
- np[1] = p[1];
- } else {
- np += 2;
- np[0] = p[0];
- np[1] = p[1];
- }
- yyclassp->end = np+2;
- }
-
- return type;
-}
-
-static Reprog*
-regcomp1(char *s, int literal, int dot_type)
-{
- int token;
- Reprog *pp;
-
- /* get memory for the program */
- pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
- if(pp == 0){
- regerror("out of memory");
- return 0;
- }
- freep = pp->firstinst;
- classp = pp->class;
- errors = 0;
-
- if(setjmp(regkaboom))
- goto out;
-
- /* go compile the sucker */
- lexdone = 0;
- exprp = s;
- nclass = 0;
- nbra = 0;
- atorp = atorstack;
- andp = andstack;
- subidp = subidstack;
- lastwasand = FALSE;
- cursubid = 0;
-
- /* Start with a low priority operator to prime parser */
- pushator(START-1);
- while((token = lex(literal, dot_type)) != END){
- if((token&0300) == OPERATOR)
- operator(token);
- else
- operand(token);
- }
-
- /* Close with a low priority operator */
- evaluntil(START);
-
- /* Force END */
- operand(END);
- evaluntil(START);
-#ifdef DEBUG
- dumpstack();
-#endif
- if(nbra)
- rcerror("unmatched left paren");
- --andp; /* points to first and only operand */
- pp->startinst = andp->first;
-#ifdef DEBUG
- dump(pp);
-#endif
- pp = optimize(pp);
-#ifdef DEBUG
- print("start: %d\n", andp->first-pp->firstinst);
- dump(pp);
-#endif
-out:
- if(errors){
- free(pp);
- pp = 0;
- }
- return pp;
-}
-
-extern Reprog*
-regcomp(char *s)
-{
- return regcomp1(s, 0, ANY);
-}
-
-extern Reprog*
-regcomplit(char *s)
-{
- return regcomp1(s, 1, ANY);
-}
-
-extern Reprog*
-regcompnl(char *s)
-{
- return regcomp1(s, 0, ANYNL);
-}
--- a/sys/src/ape/lib/regexp/regcomp.h
+++ /dev/null
@@ -1,61 +1,0 @@
-/*
- * substitution list
- */
-typedef struct Resublist Resublist;
-struct Resublist
-{
- Resub m[32];
-};
-
-/* max subexpressions per program */
-Resublist ReSuBlIsT;
-#define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub))
-
-/* max character classes per program */
-Reprog RePrOg;
-#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass))
-
-/* max rune ranges per character class */
-#define NCCRUNE (sizeof(Reclass)/sizeof(wchar_t))
-
-/*
- * Actions and Tokens (Reinst types)
- *
- * 02xx are operators, value == precedence
- * 03xx are tokens, i.e. operands for operators
- */
-#define RUNE 0177
-#define OPERATOR 0200 /* Bitmask of all operators */
-#define START 0200 /* Start, used for marker on stack */
-#define RBRA 0201 /* Right bracket, ) */
-#define LBRA 0202 /* Left bracket, ( */
-#define OR 0203 /* Alternation, | */
-#define CAT 0204 /* Concatentation, implicit operator */
-#define STAR 0205 /* Closure, * */
-#define PLUS 0206 /* a+ == aa* */
-#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */
-#define ANY 0300 /* Any character except newline, . */
-#define ANYNL 0301 /* Any character including newline, . */
-#define NOP 0302 /* No operation, internal use only */
-#define BOL 0303 /* Beginning of line, ^ */
-#define EOL 0304 /* End of line, $ */
-#define CCLASS 0305 /* Character class, [] */
-#define NCCLASS 0306 /* Negated character class, [] */
-#define END 0377 /* Terminate: match found */
-
-/*
- * regexec execution lists
- */
-#define LISTINCREMENT 8
-typedef struct Relist Relist;
-struct Relist
-{
- Reinst *inst; /* Reinstruction of the thread */
- Resublist se; /* matched subexpressions in this thread */
-};
-extern Relist* _relist[2];
-extern Relist* _reliste[2];
-extern int _relistsize;
-
-extern Relist* _renewthread(Relist*, Reinst*, Resublist*);
-extern void _renewmatch(Resub*, int, Resublist*);
--- a/sys/src/ape/lib/regexp/regerror.c
+++ /dev/null
@@ -1,16 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "regexp.h"
-
-void
-regerror(char *s)
-{
- char buf[132];
-
- strcpy(buf, "regerror: ");
- strcat(buf, s);
- strcat(buf, "\n");
- fwrite(buf, 1, strlen(buf), stderr);
- exit(1);
-}
--- a/sys/src/ape/lib/regexp/regexec.c
+++ /dev/null
@@ -1,191 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty; /* empty set of matches */
-
-/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
- */
-static int
-regexec1(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- char *starts,
- char *eol,
- wchar_t startchar)
-{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- char *s;
- int i, checkstart;
- wchar_t r, *rp, *ep;
- int n;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
-
- match = 0;
- checkstart = startchar;
- sempty.m[0].s.sp = 0;
- if(mp!=0)
- for(i=0; i<ms; i++)
- mp[i].s.sp = mp[i].e.ep = 0;
- _relist[0][0].inst = _relist[1][0].inst = 0;
-
- /* Execute machine once for each character, including terminal NUL */
- s = starts;
- do{
- /* fast check for first char */
- r = *(unsigned char*)s;
- if(checkstart && r != startchar){
- s++;
- continue;
- }
-
- if(r < Runeself)
- n = 1;
- else {
- n = mbtowc(&r, s, MB_CUR_MAX);
- if (n <= 0)
- n = 1;
- }
-
- /* switch run lists */
- tl = _relist[flag];
- tle = _reliste[flag];
- nl = _relist[flag^=1];
- nle = _reliste[flag];
- nl->inst = 0;
-
- /* Add first instruction to current list */
- if(match == 0){
- sempty.m[0].s.sp = s;
- _renewthread(tl, progp->startinst, &sempty);
- }
-
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
- if(s == eol)
- break;
-
- for(inst = tlp->inst; ; inst = inst->l.next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r.r == r)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case LBRA:
- tlp->se.m[inst->r.subid].s.sp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->r.subid].e.ep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].e.ep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
- }
- break;
- }
- }
- checkstart = startchar && nl->inst==0;
- s += n;
- }while(r);
- return match;
-}
-
-extern int
-regexec(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
-{
- char *starts; /* where to start match */
- char *eol; /* where to end match */
- wchar_t startchar;
- int rv;
-
- /*
- * use user-specified starting/ending location if specified
- */
- starts = bol;
- eol = 0;
- if(mp && ms>0){
- if(mp->s.sp)
- starts = mp->s.sp;
- if(mp->e.ep)
- eol = mp->e.ep;
- }
- startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
- ? progp->startinst->r.r : 0;
-
- /* keep trying till we have enough list space to terminate */
- for(;;){
- if(_relist[0] == 0){
- _relist[0] = malloc(2*_relistsize*sizeof(Relist));
- _relist[1] = _relist[0] + _relistsize;
- _reliste[0] = _relist[0] + _relistsize - 1;
- _reliste[1] = _relist[1] + _relistsize - 1;
- if(_relist[0] == 0)
- regerror("_relist overflow");
- }
- rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
- if(rv >= 0)
- return rv;
- free(_relist[0]);
- _relist[0] = 0;
- _relistsize += LISTINCREMENT;
- }
-}
--- a/sys/src/ape/lib/regexp/regsub.c
+++ /dev/null
@@ -1,64 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern void
-regsub(char *sp, /* source string */
- char *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
-{
- char *ssp, *ep;
- int i;
-
- ep = dp+dlen-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp[i].s.sp != 0 && mp!=0 && ms>i)
- for(ssp = mp[i].s.sp;
- ssp < mp[i].e.ep;
- ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
- }
- }else if(*sp == '&'){
- if(mp[0].s.sp != 0 && mp!=0 && ms>0)
- if(mp[0].s.sp != 0)
- for(ssp = mp[0].s.sp;
- ssp < mp[0].e.ep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
- }
- sp++;
- }
- *dp = '\0';
-}
--- a/sys/src/ape/lib/regexp/rregexec.c
+++ /dev/null
@@ -1,181 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty; /* empty set of matches */
-
-/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
- */
-static int
-rregexec1(Reprog *progp, /* program to run */
- wchar_t *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- wchar_t *starts,
- wchar_t *eol,
- wchar_t startchar)
-{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- wchar_t *s;
- int i, checkstart;
- wchar_t r, *rp, *ep;
- int n;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
-
- match = 0;
- checkstart = startchar;
- sempty.m[0].s.rsp = 0;
- if(mp!=0)
- for(i=0; i<ms; i++)
- mp[i].s.rsp = mp[i].e.rep = 0;
- _relist[0][0].inst = _relist[1][0].inst = 0;
-
- /* Execute machine once for each character, including terminal NUL */
- s = starts;
- do{
- r = *s;
-
- /* fast check for first char */
- if(checkstart && r!=startchar){
- s++;
- continue;
- }
-
- /* switch run lists */
- tl = _relist[flag];
- tle = _reliste[flag];
- nl = _relist[flag^=1];
- nle = _reliste[flag];
- nl->inst = 0;
-
- /* Add first instruction to current list */
- sempty.m[0].s.rsp = s;
- _renewthread(tl, progp->startinst, &sempty);
-
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
- if(s == eol)
- break;
-
- for(inst=tlp->inst; ; inst = inst->l.next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r.r == r)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case LBRA:
- tlp->se.m[inst->r.subid].s.rsp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->r.subid].e.rep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].e.rep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
- }
- break;
- }
- }
- checkstart = startchar && nl->inst==0;
- s++;
- }while(r);
- return match;
-}
-
-extern int
-rregexec(Reprog *progp, /* program to run */
- wchar_t *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
-{
- wchar_t *starts; /* where to start match */
- wchar_t *eol; /* where to end match */
- wchar_t startchar;
- int rv;
-
- /*
- * use user-specified starting/ending location if specified
- */
- starts = bol;
- eol = 0;
- if(mp && ms>0){
- if(mp->s.rsp)
- starts = mp->s.rsp;
- if(mp->e.rep)
- eol = mp->e.rep;
- }
- startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0;
-
- /* keep trying till we have enough list space to terminate */
- for(;;){
- if(_relist[0] == 0){
- _relist[0] = malloc(2*_relistsize*sizeof(Relist));
- _relist[1] = _relist[0] + _relistsize;
- _reliste[0] = _relist[0] + _relistsize - 1;
- _reliste[1] = _relist[1] + _relistsize - 1;
- if(_relist[0] == 0)
- regerror("_relist overflow");
- }
- rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar);
- if(rv >= 0)
- return rv;
- free(_relist[0]);
- _relist[0] = 0;
- _relistsize += LISTINCREMENT;
- }
-}
--- a/sys/src/ape/lib/regexp/rregsub.c
+++ /dev/null
@@ -1,64 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern void
-rregsub(wchar_t *sp, /* source string */
- wchar_t *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
-{
- wchar_t *ssp, *ep;
- int i;
-
- ep = dp+(dlen/sizeof(wchar_t))-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp[i].s.rsp != 0 && mp!=0 && ms>i)
- for(ssp = mp[i].s.rsp;
- ssp < mp[i].e.rep;
- ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
- }
- }else if(*sp == '&'){
- if(mp[0].s.rsp != 0 && mp!=0 && ms>0)
- if(mp[0].s.rsp != 0)
- for(ssp = mp[0].s.rsp;
- ssp < mp[0].e.rep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
- }
- sp++;
- }
- *dp = '\0';
-}
--- a/sys/src/cmd/awk/awk.h
+++ b/sys/src/cmd/awk/awk.h
@@ -6,20 +6,18 @@
typedef double Awkfloat;
-/* unsigned char is more trouble than it's worth */
+#define xfree(a) { if ((a) != nil) { free((a)); (a) = nil; } }
-typedef unsigned char uschar;
-
-#define xfree(a) { if ((a) != NULL) { free((char *) a); a = NULL; } }
-
#define DEBUG
#ifdef DEBUG
/* uses have to be doubly parenthesized */
-# define dprintf(x) if (dbg) printf x
+# define dprint(x) if (dbg) print x
#else
-# define dprintf(x)
+# define dprint(x)
#endif
+#define FOPEN_MAX 40 /* max number of open files */
+
extern char errbuf[];
extern int compile_time; /* 1 if compiling, 0 if running */
@@ -28,6 +26,10 @@
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
extern int recsize; /* size of current record, orig RECSIZE */
+extern Biobuf stdin;
+extern Biobuf stdout;
+extern Biobuf stderr;
+
extern char **FS;
extern char **RS;
extern char **ORS;
@@ -56,8 +58,8 @@
/* Cell: all information about a variable or constant */
typedef struct Cell {
- uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */
- uschar csub; /* CCON, CTEMP, CFLD, etc. */
+ uchar ctype; /* OCELL, OBOOL, OJUMP, etc. */
+ uchar csub; /* CCON, CTEMP, CFLD, etc. */
char *nval; /* name, for variables only */
char *sval; /* string value */
Awkfloat fval; /* value as number */
@@ -66,7 +68,7 @@
} Cell;
typedef struct Array { /* symbol table array */
- int nelem; /* elements in table right now */
+ int nelemt; /* elements in table right now */
int size; /* size of tab */
Cell **tab; /* hash table pointers */
} Array;
--- a/sys/src/cmd/awk/awkgram.y
+++ b/sys/src/cmd/awk/awkgram.y
@@ -23,8 +23,9 @@
****************************************************************/
%{
-#include <stdio.h>
-#include <string.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
#include "awk.h"
#define makedfa(a,b) compre(a)
--- a/sys/src/cmd/awk/lex.c
+++ b/sys/src/cmd/awk/lex.c
@@ -22,10 +22,10 @@
THIS SOFTWARE.
****************************************************************/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <u.h>
+#include <libc.h>
#include <ctype.h>
+#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@@ -90,9 +90,8 @@
{ "while", WHILE, WHILE },
};
-#define DEBUG
#ifdef DEBUG
-#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
+#define RET(x) { if(dbg)print("lex %s\n", tokname(x)); return(x); }
#else
#define RET(x) return(x)
#endif
@@ -170,7 +169,7 @@
static char *buf = 0;
static int bufsize = 500;
- if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsize)) == nil)
FATAL( "out of space in yylex" );
if (sc) {
sc = 0;
@@ -353,7 +352,7 @@
static char *buf = 0;
static int bufsz = 500;
- if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
FATAL("out of space for strings");
for (bp = buf; (c = input()) != '"'; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
@@ -401,7 +400,7 @@
}
*px = 0;
unput(c);
- sscanf(xbuf, "%x", &n);
+ n = strtol(xbuf, nil, 16);
*bp++ = n;
break;
}
@@ -497,7 +496,7 @@
static int bufsz = 500;
char *bp;
- if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
FATAL("out of space for rex expr");
bp = buf;
for ( ; (c = input()) != '/' && c != 0; ) {
@@ -526,7 +525,7 @@
char *ep = ebuf;
char yysbuf[100]; /* pushback buffer */
char *yysptr = yysbuf;
-FILE *yyin = 0;
+Biobuf *yyin;
int input(void) /* get next lexical input character */
{
@@ -535,7 +534,7 @@
if (yysptr > yysbuf)
c = *--yysptr;
- else if (lexprog != NULL) { /* awk '...' */
+ else if (lexprog != nil) { /* awk '...' */
if ((c = *lexprog) != 0)
lexprog++;
} else /* awk -f ... */
@@ -542,7 +541,7 @@
c = pgetc();
if (c == '\n')
lineno++;
- else if (c == EOF)
+ else if (c == Beof)
c = 0;
if (ep >= ebuf + sizeof ebuf)
ep = ebuf;
--- a/sys/src/cmd/awk/lib.c
+++ b/sys/src/cmd/awk/lib.c
@@ -22,17 +22,14 @@
THIS SOFTWARE.
****************************************************************/
-#define DEBUG
-#include <stdio.h>
-#include <string.h>
+#include <u.h>
+#include <libc.h>
#include <ctype.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <stdarg.h>
+#include <bio.h>
#include "awk.h"
#include "y.tab.h"
-FILE *infile = NULL;
+Biobuf *infile;
char *file = "";
char *record;
int recsize = RECSIZE;
@@ -50,10 +47,10 @@
int lastfld = 0; /* last used field */
int argno = 1; /* current input argument number */
-extern Awkfloat *ARGC;
+extern Awkfloat *AARGC;
-static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
-static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
+static Cell dollar0 = { OCELL, CFLD, nil, "", 0.0, REC|STR|DONTFREE };
+static Cell dollar1 = { OCELL, CFLD, nil, "", 0.0, FLD|STR|DONTFREE };
void recinit(unsigned int n)
{
@@ -60,7 +57,7 @@
record = (char *) malloc(n);
fields = (char *) malloc(n);
fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *));
- if (record == NULL || fields == NULL || fldtab == NULL)
+ if (record == nil || fields == nil || fldtab == nil)
FATAL("out of space for $0 and fields");
fldtab[0] = (Cell *) malloc(sizeof (Cell));
*fldtab[0] = dollar0;
@@ -76,10 +73,10 @@
for (i = n1; i <= n2; i++) {
fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
- if (fldtab[i] == NULL)
+ if (fldtab[i] == nil)
FATAL("out of space in makefields %d", i);
*fldtab[i] = dollar1;
- sprintf(temp, "%d", i);
+ sprint(temp, "%d", i);
fldtab[i]->nval = tostring(temp);
}
}
@@ -89,7 +86,7 @@
int i;
char *p;
- for (i = 1; i < *ARGC; i++) {
+ for (i = 1; i < *AARGC; i++) {
if (!isclvar(p = getargv(i))) { /* find 1st real filename */
setsval(lookup("FILENAME", symtab), getargv(i));
return;
@@ -97,7 +94,7 @@
setclvar(p); /* a commandline assignment before filename */
argno++;
}
- infile = stdin; /* no filenames, so use stdin */
+ infile = &stdin; /* no filenames, so use &stdin */
}
int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
@@ -111,16 +108,16 @@
firsttime = 0;
initgetrec();
}
- dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
- *RS, *FS, *ARGC, *FILENAME) );
+ dprint( ("RS=<%s>, FS=<%s>, AARGC=%g, FILENAME=%s\n",
+ *RS, *FS, *AARGC, *FILENAME) );
if (isrecord) {
donefld = 0;
donerec = 1;
}
buf[0] = 0;
- while (argno < *ARGC || infile == stdin) {
- dprintf( ("argno=%d, file=|%s|\n", argno, file) );
- if (infile == NULL) { /* have to open a new file */
+ while (argno < *AARGC || infile == &stdin) {
+ dprint( ("argno=%d, file=|%s|\n", argno, file) );
+ if (infile == nil) { /* have to open a new file */
file = getargv(argno);
if (*file == '\0') { /* it's been zapped */
argno++;
@@ -132,10 +129,10 @@
continue;
}
*FILENAME = file;
- dprintf( ("opening file %s\n", file) );
+ dprint( ("opening file %s\n", file) );
if (*file == '-' && *(file+1) == '\0')
- infile = stdin;
- else if ((infile = fopen(file, "r")) == NULL)
+ infile = &stdin;
+ else if ((infile = Bopen(file, OREAD)) == nil)
FATAL("can't open file %s", file);
setfval(fnrloc, 0.0);
}
@@ -157,10 +154,10 @@
*pbufsize = bufsize;
return 1;
}
- /* EOF arrived on this file; set up next */
- if (infile != stdin)
- fclose(infile);
- infile = NULL;
+ /* Beof arrived on this file; set up next */
+ if (infile != &stdin)
+ Bterm(infile);
+ infile = nil;
argno++;
}
*pbuf = buf;
@@ -170,13 +167,13 @@
void nextfile(void)
{
- if (infile != stdin)
- fclose(infile);
- infile = NULL;
+ if (infile != &stdin)
+ Bterm(infile);
+ infile = nil;
argno++;
}
-int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
+int readrec(char **pbuf, int *pbufsize, Biobuf *inf) /* read one record into buf */
{
int sep, c;
char *rr, *buf = *pbuf;
@@ -187,21 +184,21 @@
strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = **RS) == 0) {
sep = '\n';
- while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
+ while ((c=Bgetc(inf)) == '\n' && c != Beof) /* skip leading \n's */
;
- if (c != EOF)
- ungetc(c, inf);
+ if (c != Beof)
+ Bungetc(inf);
}
for (rr = buf; ; ) {
- for (; (c=getc(inf)) != sep && c != EOF; ) {
+ for (; (c=Bgetc(inf)) != sep && c != Beof; ) {
if (rr-buf+1 > bufsize)
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
FATAL("input record `%.30s...' too long", buf);
*rr++ = c;
}
- if (**RS == sep || c == EOF)
+ if (**RS == sep || c == Beof)
break;
- if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+ if ((c = Bgetc(inf)) == '\n' || c == Beof) /* 2 in a row */
break;
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
FATAL("input record `%.30s...' too long", buf);
@@ -211,10 +208,10 @@
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
FATAL("input record `%.30s...' too long", buf);
*rr = 0;
- dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
+ dprint( ("readrec saw <%s>, returns %d\n", buf, c == Beof && rr == buf ? 0 : 1) );
*pbuf = buf;
*pbufsize = bufsize;
- return c == EOF && rr == buf ? 0 : 1;
+ return c == Beof && rr == buf ? 0 : 1;
}
char *getargv(int n) /* get ARGV[n] */
@@ -223,10 +220,10 @@
char *s, temp[50];
extern Array *ARGVtab;
- sprintf(temp, "%d", n);
+ sprint(temp, "%d", n);
x = setsymtab(temp, "", 0.0, STR, ARGVtab);
s = getsval(x);
- dprintf( ("getargv(%d) returns |%s|\n", n, s) );
+ dprint( ("getargv(%d) returns |%s|\n", n, s) );
return s;
}
@@ -245,7 +242,7 @@
q->fval = atof(q->sval);
q->tval |= NUM;
}
- dprintf( ("command line set %s to |%s|\n", s, p) );
+ dprint( ("command line set %s to |%s|\n", s, p) );
}
@@ -265,7 +262,7 @@
n = strlen(r);
if (n > fieldssize) {
xfree(fields);
- if ((fields = (char *) malloc(n+1)) == NULL)
+ if ((fields = (char *) malloc(n+1)) == nil)
FATAL("out of space for fields in fldbld %d", n);
fieldssize = n;
}
@@ -273,7 +270,7 @@
i = 0; /* number of fields accumulated here */
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
- } else if ((sep = *inputFS) == ' ') { /* default whitespace */
+ } else if (*inputFS == ' ') { /* default whitespace */
for (i = 0; ; ) {
while (*r == ' ' || *r == '\t' || *r == '\n')
r++;
@@ -339,7 +336,7 @@
if (dbg) {
for (j = 0; j <= lastfld; j++) {
p = fldtab[j];
- printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
+ print("field %d (%s): |%s|\n", j, p->nval, p->sval);
}
}
}
@@ -383,7 +380,7 @@
if (n > nf)
nf = n;
fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *)));
- if (fldtab == NULL)
+ if (fldtab == nil)
FATAL("out of space creating %d fields", nf);
makefields(nfields+1, nf);
nfields = nf;
@@ -395,12 +392,12 @@
/* the fields are all stored in this one array with \0's */
char *fr;
void *p;
- int i, tempstat, n;
+ int i, n;
n = strlen(rec);
if (n > fieldssize) {
xfree(fields);
- if ((fields = (char *) malloc(n+1)) == NULL)
+ if ((fields = (char *) malloc(n+1)) == nil)
FATAL("out of space for fields in refldbld %d", n);
fieldssize = n;
}
@@ -409,7 +406,7 @@
if (*rec == '\0')
return 0;
p = compre(fs);
- dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
+ dprint( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
for (i = 1; ; i++) {
if (i > nfields)
growfldtab(i);
@@ -417,15 +414,15 @@
xfree(fldtab[i]->sval);
fldtab[i]->tval = FLD | STR | DONTFREE;
fldtab[i]->sval = fr;
- dprintf( ("refldbld: i=%d\n", i) );
+ dprint( ("refldbld: i=%d\n", i) );
if (nematch(p, rec, rec)) {
- dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
+ dprint( ("match %s (%d chars)\n", patbeg, patlen) );
strncpy(fr, rec, patbeg-rec);
fr += patbeg - rec + 1;
*(fr-1) = '\0';
rec = patbeg + patlen;
} else {
- dprintf( ("no match %s\n", rec) );
+ dprint( ("no match %s\n", rec) );
strcpy(fr, rec);
break;
}
@@ -457,7 +454,7 @@
if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
FATAL("built giant record `%.30s...'", record);
*r = '\0';
- dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
+ dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
@@ -464,8 +461,8 @@
fldtab[0]->tval = REC | STR | DONTFREE;
fldtab[0]->sval = record;
- dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
- dprintf( ("recbld = |%s|\n", record) );
+ dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
+ dprint( ("recbld = |%s|\n", record) );
donerec = 1;
}
@@ -484,24 +481,26 @@
if (been_here++ > 2)
return;
- fprintf(stderr, "%s: ", cmdname);
+ Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt);
- vfprintf(stderr, fmt, varg);
+ Bvprint(&stderr, fmt, varg);
va_end(varg);
- if(compile_time == 1 && cursource() != NULL)
- fprintf(stderr, " at %s:%d", cursource(), lineno);
+ if(compile_time == 1 && cursource() != nil)
+ Bprint(&stderr, " at %s:%d", cursource(), lineno);
else
- fprintf(stderr, " at line %d", lineno);
- if (curfname != NULL)
- fprintf(stderr, " in function %s", curfname);
- fprintf(stderr, "\n");
+ Bprint(&stderr, " at line %d", lineno);
+ if (curfname != nil)
+ Bprint(&stderr, " in function %s", curfname);
+ Bprint(&stderr, "\n");
errorflag = 2;
eprint();
}
-void fpecatch(int n)
+int handler(void *, char *err)
{
- FATAL("floating point exception %d", n);
+ Bflush(&stdout);
+ fprint(2, "%s\n", err);
+ return 0;
}
extern int bracecnt, brackcnt, parencnt;
@@ -513,7 +512,7 @@
if (beenhere++)
return;
- while ((c = input()) != EOF && c != '\0')
+ while ((c = input()) != Beof && c != '\0')
bclass(c);
bcheck2(bracecnt, '{', '}');
bcheck2(brackcnt, '[', ']');
@@ -520,16 +519,16 @@
bcheck2(parencnt, '(', ')');
}
-void bcheck2(int n, int c1, int c2)
+void bcheck2(int n, int, int c2)
{
if (n == 1)
- fprintf(stderr, "\tmissing %c\n", c2);
+ Bprint(&stderr, "\tmissing %c\n", c2);
else if (n > 1)
- fprintf(stderr, "\t%d missing %c's\n", n, c2);
+ Bprint(&stderr, "\t%d missing %c's\n", n, c2);
else if (n == -1)
- fprintf(stderr, "\textra %c\n", c2);
+ Bprint(&stderr, "\textra %c\n", c2);
else if (n < -1)
- fprintf(stderr, "\t%d extra %c's\n", -n, c2);
+ Bprint(&stderr, "\t%d extra %c's\n", -n, c2);
}
void FATAL(char *fmt, ...)
@@ -537,15 +536,15 @@
extern char *cmdname;
va_list varg;
- fflush(stdout);
- fprintf(stderr, "%s: ", cmdname);
+ Bflush(&stdout);
+ Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt);
- vfprintf(stderr, fmt, varg);
+ Bvprint(&stderr, fmt, varg);
va_end(varg);
error();
if (dbg > 1) /* core dump if serious debugging on */
abort();
- exit(2);
+ exits("FATAL");
}
void WARNING(char *fmt, ...)
@@ -553,10 +552,10 @@
extern char *cmdname;
va_list varg;
- fflush(stdout);
- fprintf(stderr, "%s: ", cmdname);
+ Bflush(&stdout);
+ Bprint(&stderr, "%s: ", cmdname);
va_start(varg, fmt);
- vfprintf(stderr, fmt, varg);
+ Bvprint(&stderr, fmt, varg);
va_end(varg);
error();
}
@@ -566,13 +565,13 @@
extern Node *curnode;
int line;
- fprintf(stderr, "\n");
+ Bprint(&stderr, "\n");
if (compile_time != 2 && NR && *NR > 0) {
if (strcmp(*FILENAME, "-") != 0)
- fprintf(stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
+ Bprint(&stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
else
- fprintf(stderr, " input record number %d", (int) (*FNR));
- fprintf(stderr, "\n");
+ Bprint(&stderr, " input record number %d", (int) (*FNR));
+ Bprint(&stderr, "\n");
}
if (compile_time != 2 && curnode)
line = curnode->lineno;
@@ -580,14 +579,14 @@
line = lineno;
else
line = -1;
- if (compile_time == 1 && cursource() != NULL){
+ if (compile_time == 1 && cursource() != nil){
if(line >= 0)
- fprintf(stderr, " source %s:%d", cursource(), line);
+ Bprint(&stderr, " source %s:%d", cursource(), line);
else
- fprintf(stderr, " source file %s", cursource());
+ Bprint(&stderr, " source file %s", cursource());
}else if(line >= 0)
- fprintf(stderr, " source line %d", line);
- fprintf(stderr, "\n");
+ Bprint(&stderr, " source line %d", line);
+ Bprint(&stderr, "\n");
eprint();
}
@@ -607,23 +606,23 @@
;
while (*p == '\n')
p++;
- fprintf(stderr, " context is\n\t");
+ Bprint(&stderr, " context is\n\t");
for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
;
for ( ; p < q; p++)
if (*p)
- putc(*p, stderr);
- fprintf(stderr, " >>> ");
+ Bputc(&stderr, *p);
+ Bprint(&stderr, " >>> ");
for ( ; p < ep; p++)
if (*p)
- putc(*p, stderr);
- fprintf(stderr, " <<< ");
+ Bputc(&stderr, *p);
+ Bprint(&stderr, " <<< ");
if (*ep)
- while ((c = input()) != '\n' && c != '\0' && c != EOF) {
- putc(c, stderr);
+ while ((c = input()) != '\n' && c != '\0' && c != Beof) {
+ Bputc(&stderr, c);
bclass(c);
}
- putc('\n', stderr);
+ Bputc(&stderr, '\n');
ep = ebuf;
}
@@ -642,12 +641,10 @@
double errcheck(double x, char *s)
{
- if (errno == EDOM) {
- errno = 0;
+ if (isNaN(x)) {
WARNING("%s argument out of domain", s);
x = 1;
- } else if (errno == ERANGE) {
- errno = 0;
+ } else if (isInf(x, 1) || isInf(x, -1)) {
WARNING("%s result out of range", s);
x = 1;
}
@@ -668,7 +665,6 @@
/* strtod is supposed to be a proper test of what's a valid number */
-#include <math.h>
int is_number(char *s)
{
double r;
@@ -699,9 +695,8 @@
return 0; /* can't be a number */
}
- errno = 0;
r = strtod(s, &ep);
- if (ep == s || r == HUGE_VAL || errno == ERANGE)
+ if (ep == s || isInf(r, 1) || isInf(r, -1))
return 0;
while (*ep == ' ' || *ep == '\t' || *ep == '\n')
ep++;
--- a/sys/src/cmd/awk/main.c
+++ b/sys/src/cmd/awk/main.c
@@ -24,21 +24,21 @@
char *version = "version 19990602";
-#define DEBUG
-#include <stdio.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <string.h>
-#include <signal.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
#include "awk.h"
#include "y.tab.h"
-extern char **environ;
extern int nfields;
+Biobuf stdin;
+Biobuf stdout;
+Biobuf stderr;
+
int dbg = 0;
char *cmdname; /* gets argv[0] for error messages */
-extern FILE *yyin; /* lex input file */
+extern Biobuf *yyin; /* lex input file */
char *lexprog; /* points to program argument if it exists */
extern int errorflag; /* non-zero if any syntax errors; set by yyerror */
int compile_time = 2; /* for error printing: */
@@ -50,18 +50,23 @@
int safe = 0; /* 1 => "safe" mode */
-int main(int argc, char *argv[])
+void main(int argc, char *argv[])
{
- char *fs = NULL, *marg;
+ char *fs = nil, *marg;
int temp;
+ Binit(&stdin, 0, OREAD);
+ Binit(&stdout, 1, OWRITE);
+ Binit(&stderr, 2, OWRITE);
+
cmdname = argv[0];
if (argc == 1) {
- fprintf(stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
- exit(1);
+ Bprint(&stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
+ exits("usage");
}
- signal(SIGFPE, fpecatch);
- yyin = NULL;
+
+ atnotify(handler, 1);
+ yyin = nil;
symtab = makesymtab(NSYMTAB);
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
@@ -94,7 +99,7 @@
else if (argc > 1 && argv[1][0] != 0)
fs = &argv[1][0];
}
- if (fs == NULL || *fs == '\0')
+ if (fs == nil || *fs == '\0')
WARNING("field separator FS is empty");
break;
case 'v': /* -v a=1 to be done NOW. one -v for each */
@@ -120,11 +125,11 @@
dbg = atoi(&argv[1][2]);
if (dbg == 0)
dbg = 1;
- printf("awk %s\n", version);
+ print("awk %s\n", version);
break;
case 'V': /* added for exptools "standard" */
- printf("awk %s\n", version);
- exit(0);
+ print("awk %s\n", version);
+ exits(0);
break;
default:
WARNING("unknown option %s ignored", argv[1]);
@@ -137,10 +142,10 @@
if (npfile == 0) { /* no -f; first argument is program */
if (argc <= 1) {
if (dbg)
- exit(0);
+ exits(0);
FATAL("no program given");
}
- dprintf( ("program = |%s|\n", argv[1]) );
+ dprint( ("program = |%s|\n", argv[1]) );
lexprog = argv[1];
argc--;
argv++;
@@ -149,20 +154,20 @@
syminit();
compile_time = 1;
argv[0] = cmdname; /* put prog name at front of arglist */
- dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
+ dprint( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
arginit(argc, argv);
- if (!safe)
- envinit(environ);
yyparse();
if (fs)
*FS = qstring(fs, '\0');
- dprintf( ("errorflag=%d\n", errorflag) );
+ dprint( ("errorflag=%d\n", errorflag) );
if (errorflag == 0) {
compile_time = 0;
run(winner);
} else
bracecheck();
- return(errorflag);
+ if(errorflag)
+ exits("error");
+ exits(0);
}
int pgetc(void) /* get 1 character from awk program */
@@ -170,20 +175,20 @@
int c;
for (;;) {
- if (yyin == NULL) {
+ if (yyin == nil) {
if (curpfile >= npfile)
- return EOF;
+ return Beof;
if (strcmp(pfile[curpfile], "-") == 0)
- yyin = stdin;
- else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
+ yyin = &stdin;
+ else if ((yyin = Bopen(pfile[curpfile], OREAD)) == nil)
FATAL("can't open file %s", pfile[curpfile]);
lineno = 1;
}
- if ((c = getc(yyin)) != EOF)
+ if ((c = Bgetc(yyin)) != Beof)
return c;
- if (yyin != stdin)
- fclose(yyin);
- yyin = NULL;
+ if (yyin != &stdin)
+ Bterm(yyin);
+ yyin = nil;
curpfile++;
}
}
@@ -193,5 +198,5 @@
if (npfile > 0)
return pfile[curpfile];
else
- return NULL;
+ return nil;
}
--- a/sys/src/cmd/awk/maketab.c
+++ b/sys/src/cmd/awk/maketab.c
@@ -28,9 +28,9 @@
* it finds the indices in y.tab.h, produced by yacc.
*/
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@@ -39,7 +39,7 @@
char *name;
char *pname;
} proc[] = {
- { PROGRAM, "program", NULL },
+ { PROGRAM, "program", nil },
{ BOR, "boolop", " || " },
{ AND, "boolop", " && " },
{ NOT, "boolop", " !" },
@@ -49,13 +49,13 @@
{ LT, "relop", " < " },
{ GE, "relop", " >= " },
{ GT, "relop", " > " },
- { ARRAY, "array", NULL },
+ { ARRAY, "array", nil },
{ INDIRECT, "indirect", "$(" },
{ SUBSTR, "substr", "substr" },
{ SUB, "sub", "sub" },
{ GSUB, "gsub", "gsub" },
{ INDEX, "sindex", "sindex" },
- { SPRINTF, "awksprintf", "sprintf " },
+ { SPRINTF, "awksprintf", "sprintf" },
{ ADD, "arith", " + " },
{ MINUS, "arith", " - " },
{ MULT, "arith", " * " },
@@ -68,8 +68,8 @@
{ PREDECR, "incrdecr", "--" },
{ POSTDECR, "incrdecr", "--" },
{ CAT, "cat", " " },
- { PASTAT, "pastat", NULL },
- { PASTAT2, "dopa2", NULL },
+ { PASTAT, "pastat", nil },
+ { PASTAT2, "dopa2", nil },
{ MATCH, "matchop", " ~ " },
{ NOTMATCH, "matchop", " !~ " },
{ MATCHFCN, "matchop", "matchop" },
@@ -110,59 +110,62 @@
char *table[SIZE];
char *names[SIZE];
-int main(int argc, char *argv[])
+void main(int, char**)
{
struct xx *p;
- int i, n, tok;
- char c;
- FILE *fp;
- char buf[200], name[200], def[200];
+ int i, tok;
+ Biobuf *fp;
+ char *buf, *toks[3];
- printf("#include <stdio.h>\n");
- printf("#include \"awk.h\"\n");
- printf("#include \"y.tab.h\"\n\n");
+ print("#include <u.h>\n");
+ print("#include <libc.h>\n");
+ print("#include <bio.h>\n");
+ print("#include \"awk.h\"\n");
+ print("#include \"y.tab.h\"\n\n");
for (i = SIZE; --i >= 0; )
names[i] = "";
- if ((fp = fopen("y.tab.h", "r")) == NULL) {
- fprintf(stderr, "maketab can't open y.tab.h!\n");
- exit(1);
+ if ((fp = Bopen("y.tab.h", OREAD)) == nil) {
+ fprint(2, "maketab can't open y.tab.h!\n");
+ exits("can't open y.tab.h");
}
- printf("static char *printname[%d] = {\n", SIZE);
+ print("static char *printname[%d] = {\n", SIZE);
i = 0;
- while (fgets(buf, sizeof buf, fp) != NULL) {
- n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
- if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
+ while ((buf = Brdline(fp, '\n')) != nil) {
+ buf[Blinelen(fp)-1] = '\0';
+ tokenize(buf, toks, 3);
+ if (toks[0] == nil || strcmp("#define", toks[0]) != 0) /* not a valid #define */
continue;
+ tok = strtol(toks[2], nil, 10);
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
- fprintf(stderr, "maketab funny token %d %s\n", tok, buf);
- exit(1);
+ fprint(2, "maketab funny token %d %s\n", tok, buf);
+ exits("funny token");
}
- names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1);
- strcpy(names[tok-FIRSTTOKEN], name);
- printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
+ names[tok-FIRSTTOKEN] = (char *) malloc(strlen(toks[1])+1);
+ strcpy(names[tok-FIRSTTOKEN], toks[1]);
+ print("\t(char *) \"%s\",\t/* %d */\n", toks[1], tok);
i++;
}
- printf("};\n\n");
+ print("};\n\n");
for (p=proc; p->token!=0; p++)
table[p->token-FIRSTTOKEN] = p->name;
- printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
+ print("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
for (i=0; i<SIZE; i++)
if (table[i]==0)
- printf("\tnullproc,\t/* %s */\n", names[i]);
+ print("\tnullproc,\t/* %s */\n", names[i]);
else
- printf("\t%s,\t/* %s */\n", table[i], names[i]);
- printf("};\n\n");
+ print("\t%s,\t/* %s */\n", table[i], names[i]);
+ print("};\n\n");
- printf("char *tokname(int n)\n"); /* print a tokname() function */
- printf("{\n");
- printf(" static char buf[100];\n\n");
- printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
- printf(" sprintf(buf, \"token %%d\", n);\n");
- printf(" return buf;\n");
- printf(" }\n");
- printf(" return printname[n-FIRSTTOKEN];\n");
- printf("}\n");
- return 0;
+ print("char *tokname(int n)\n"); /* print a tokname() function */
+ print("{\n");
+ print(" static char buf[100];\n\n");
+ print(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
+ print(" sprint(buf, \"token %%d\", n);\n");
+ print(" return buf;\n");
+ print(" }\n");
+ print(" return printname[n-FIRSTTOKEN];\n");
+ print("}\n");
+ exits(0);
}
--- a/sys/src/cmd/awk/mkfile
+++ b/sys/src/cmd/awk/mkfile
@@ -6,6 +6,7 @@
main.$O\
parse.$O\
proctab.$O\
+ popen.$O\
tran.$O\
lib.$O\
run.$O\
@@ -28,11 +29,6 @@
${TARG:%=/386/bin/%}\
</sys/src/cmd/mkone
-CFLAGS=-c -D_REGEXP_EXTENSION -D_RESEARCH_SOURCE -D_BSD_EXTENSION -DUTF
-YFLAGS=-S -d -v
-CC=pcc
-LD=pcc
-cpuobjtype=`{sed -n 's/^O=//p' /$cputype/mkfile}
y.tab.h awkgram.c: $YFILES
$YACC -o awkgram.c $YFLAGS $prereq
@@ -43,10 +39,10 @@
nuke:V:
rm -f *.[$OS] [$OS].out [$OS].maketab y.tab.? y.debug y.output awkgram.c proctab.c $TARG
-proctab.c: $cpuobjtype.maketab
- ./$cpuobjtype.maketab >proctab.c
+proctab.c: $O.maketab
+ ./$O.maketab >proctab.c
-$cpuobjtype.maketab: y.tab.h maketab.c
+$O.maketab: y.tab.h maketab.c
objtype=$cputype
mk maketab.$cputype
--- a/sys/src/cmd/awk/parse.c
+++ b/sys/src/cmd/awk/parse.c
@@ -22,10 +22,9 @@
THIS SOFTWARE.
****************************************************************/
-#define DEBUG
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@@ -34,9 +33,9 @@
Node *x;
x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
- if (x == NULL)
+ if (x == nil)
FATAL("out of space in nodealloc");
- x->nnext = NULL;
+ x->nnext = nil;
x->lineno = lineno;
return(x);
}
@@ -220,11 +219,11 @@
if (errorflag) /* don't link things that are wrong */
return a;
- if (a == NULL)
+ if (a == nil)
return(b);
- else if (b == NULL)
+ else if (b == nil)
return(a);
- for (c = a; c->nnext != NULL; c = c->nnext)
+ for (c = a; c->nnext != nil; c = c->nnext)
;
c->nnext = b;
return(a);
@@ -245,7 +244,7 @@
for (p = vl; p; p = p->nnext)
n++;
v->fval = n;
- dprintf( ("defining func %s (%d args)\n", v->nval, n) );
+ dprint( ("defining func %s (%d args)\n", v->nval, n) );
}
int isarg(char *s) /* is s in argument list for current function? */
@@ -262,7 +261,7 @@
int ptoi(void *p) /* convert pointer to integer */
{
- return (int) (long) p; /* swearing that p fits, of course */
+ return (int) (vlong) p; /* swearing that p fits, of course */
}
Node *itonp(int i) /* and vice versa */
--- /dev/null
+++ b/sys/src/cmd/awk/popen.c
@@ -1,0 +1,91 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "awk.h"
+
+#define MAXFORKS 20
+#define NSYSFILE 3
+#define tst(a,b) (mode == OREAD? (b) : (a))
+#define RDR 0
+#define WTR 1
+
+struct a_fork {
+ short done;
+ short fd;
+ int pid;
+ char status[128];
+};
+static struct a_fork the_fork[MAXFORKS];
+
+Biobuf*
+popen(char *cmd, int mode)
+{
+ int p[2];
+ int myside, hisside, pid;
+ int i, ind;
+
+ for (ind = 0; ind < MAXFORKS; ind++)
+ if (the_fork[ind].pid == 0)
+ break;
+ if (ind == MAXFORKS)
+ return nil;
+ if(pipe(p) < 0)
+ return nil;
+ myside = tst(p[WTR], p[RDR]);
+ hisside = tst(p[RDR], p[WTR]);
+ switch (pid = fork()) {
+ case -1:
+ return nil;
+ case 0:
+ /* myside and hisside reverse roles in child */
+ close(myside);
+ dup(hisside, tst(0, 1));
+ for (i=NSYSFILE; i<FOPEN_MAX; i++)
+ close(i);
+ execl("/bin/rc", "rc", "-c", cmd, nil);
+ exits("exec failed");
+ default:
+ the_fork[ind].pid = pid;
+ the_fork[ind].fd = myside;
+ the_fork[ind].done = 0;
+ close(hisside);
+ return(Bfdopen(myside, mode));
+ }
+}
+
+int
+pclose(Biobuf *ptr)
+{
+ int f, r, ind;
+ Waitmsg *status;
+
+ f = Bfildes(ptr);
+ Bterm(ptr);
+ for (ind = 0; ind < MAXFORKS; ind++)
+ if (the_fork[ind].fd == f && the_fork[ind].pid != 0)
+ break;
+ if (ind == MAXFORKS)
+ return -1;
+ if (!the_fork[ind].done) {
+ do {
+ if((status = wait()) == nil)
+ r = -1;
+ else
+ r = status->pid;
+ for (f = 0; f < MAXFORKS; f++) {
+ if (r == the_fork[f].pid) {
+ the_fork[f].done = 1;
+ strecpy(the_fork[f].status, the_fork[f].status+512, status->msg);
+ break;
+ }
+ }
+ free(status);
+ } while(r != the_fork[ind].pid && r != -1);
+ if(r == -1)
+ strcpy(the_fork[ind].status, "No loved ones to wait for");
+ }
+ the_fork[ind].pid = 0;
+ if(the_fork[ind].status[0] != '\0')
+ return 1;
+ return 0;
+}
--- a/sys/src/cmd/awk/proto.h
+++ b/sys/src/cmd/awk/proto.h
@@ -44,7 +44,6 @@
extern int match(void *, char *, char *);
extern int pmatch(void *, char *, char *);
extern int nematch(void *, char *, char *);
-extern int countposn(char *, int);
extern void overflow(void);
extern int pgetc(void);
@@ -100,7 +99,7 @@
extern void growfldtab(int n);
extern int getrec(char **, int *, int);
extern void nextfile(void);
-extern int readrec(char **buf, int *bufsize, FILE *inf);
+extern int readrec(char **buf, int *bufsize, Biobuf *inf);
extern char *getargv(int);
extern void setclvar(char *);
extern void fldbld(void);
@@ -110,7 +109,7 @@
extern void recbld(void);
extern Cell *fieldadr(int);
extern void yyerror(char *);
-extern void fpecatch(int);
+extern int handler(void*, char*);
extern void bracecheck(void);
extern void bcheck2(int, int, int);
extern void SYNTAX(char *, ...);
@@ -165,13 +164,13 @@
extern Cell *bltin(Node **, int);
extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int);
-extern FILE *redirect(int, Node *);
-extern FILE *openfile(int, char *);
-extern char *filename(FILE *);
+extern Biobuf *redirect(int, Node *);
+extern Biobuf *openfile(int, char *);
+extern char *filename(Biobuf *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *sub(Node **, int);
extern Cell *gsub(Node **, int);
-extern FILE *popen(const char *, const char *);
-extern int pclose(FILE *);
+extern Biobuf *popen(char *, int);
+extern int pclose(Biobuf *);
--- a/sys/src/cmd/awk/re.c
+++ b/sys/src/cmd/awk/re.c
@@ -22,18 +22,13 @@
THIS SOFTWARE.
****************************************************************/
-
-#define DEBUG
-#include <stdio.h>
+#include <u.h>
+#include <libc.h>
#include <ctype.h>
-#include <setjmp.h>
-#include <math.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
+#include <bio.h>
+#include <regexp.h>
#include "awk.h"
#include "y.tab.h"
-#include "regexp.h"
/* This file provides the interface between the main body of
* awk and the pattern matching package. It preprocesses
@@ -198,11 +193,11 @@
{
Resub m;
- m.s.sp = start;
- m.e.ep = 0;
+ m.sp = start;
+ m.ep = 0;
if (regexec((Reprog *) p, (char *) s, &m, 1)) {
- patbeg = m.s.sp;
- patlen = m.e.ep-m.s.sp;
+ patbeg = m.sp;
+ patlen = m.ep-m.sp;
return 1;
}
patlen = -1;
@@ -250,7 +245,7 @@
{
char *p = *s;
char *t = *to;
- wchar_t c;
+ Rune c;
switch(c = *p++) {
case 't':
@@ -273,8 +268,8 @@
*t++ = '\\';
if (c == 'x') { /* hexadecimal goo follows */
c = hexstr(&p);
- if (t < end-MB_CUR_MAX)
- t += wctomb(t, c);
+ if (t < end-UTFmax)
+ t += runelen(c);
else overflow();
*to = t;
*s = p;
@@ -293,21 +288,6 @@
*t++ = c;
*s = p;
*to = t;
-}
- /* count rune positions */
-int
-countposn(char *s, int n)
-{
- int i, j;
- char *end;
-
- for (i = 0, end = s+n; *s && s < end; i++){
- j = mblen(s, n);
- if(j <= 0)
- j = 1;
- s += j;
- }
- return(i);
}
/* pattern package error handler */
--- a/sys/src/cmd/awk/run.c
+++ b/sys/src/cmd/awk/run.c
@@ -22,43 +22,13 @@
THIS SOFTWARE.
****************************************************************/
-#define DEBUG
-#include <stdio.h>
+#include <u.h>
+#include <libc.h>
#include <ctype.h>
-#include <setjmp.h>
-#include <math.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-#include <utf.h>
+#include <bio.h>
#include "awk.h"
#include "y.tab.h"
-#define tempfree(x) if (istemp(x)) tfree(x); else
-
-/*
-#undef tempfree
-
-void tempfree(Cell *p) {
- if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
- WARNING("bad csub %d in Cell %d %s",
- p->csub, p->ctype, p->sval);
- }
- if (istemp(p))
- tfree(p);
-}
-*/
-
-#ifdef _NFILE
-#ifndef FOPEN_MAX
-#define FOPEN_MAX _NFILE
-#endif
-#endif
-
-#ifndef FOPEN_MAX
-#define FOPEN_MAX 40 /* max number of open files */
-#endif
-
#ifndef RAND_MAX
#define RAND_MAX 32767 /* all that ansi guarantees */
#endif
@@ -66,7 +36,7 @@
jmp_buf env;
extern int pairstack[];
-Node *winner = NULL; /* root of parse tree */
+Node *winner = nil; /* root of parse tree */
Cell *tmps; /* free temporary cells for execution */
static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
@@ -87,8 +57,43 @@
Cell *jret = &retcell;
static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
-Node *curnode = NULL; /* the node being executed, for debugging */
+Node *curnode = nil; /* the node being executed, for debugging */
+int
+system(const char *s)
+{
+ char status[512], *statfld[5];
+ int w, pid;
+ char cmd[30], *oty;
+
+ oty = getenv("cputype");
+ if(!oty)
+ return -1;
+ if(!s)
+ return 1; /* a command interpreter is available */
+ pid = fork();
+ snprint(cmd, sizeof cmd, "/%s/bin/ape/sh", oty);
+ if(pid == 0) {
+ execl(cmd, "sh", "-c", s, nil);
+ exits("exec");
+ }
+ if(pid < 0){
+ return -1;
+ }
+ for(;;) {
+ w = await(status, sizeof(status) - 1);
+ if(w == -1)
+ return -1;
+ tokenize(status, statfld, nelem(statfld));
+ if(strtol(statfld[0], nil, 0) == pid)
+ break;
+ }
+
+ if(*statfld[4] != '\0')
+ return 1;
+ return 0;
+}
+
/* buffer memory management */
int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
char *whatrtn)
@@ -110,7 +115,7 @@
if (rminlen)
minlen += quantum - rminlen;
tbuf = (char *) realloc(*pbuf, minlen);
- if (tbuf == NULL) {
+ if (tbuf == nil) {
if (whatrtn)
FATAL("out of memory in %s", whatrtn);
return 0;
@@ -139,7 +144,7 @@
Cell *x;
Node *a;
- if (u == NULL)
+ if (u == nil)
return(True);
for (a = u; ; a = a->nnext) {
curnode = a;
@@ -164,14 +169,15 @@
return(x);
if (isjump(x))
return(x);
- if (a->nnext == NULL)
+ if (a->nnext == nil)
return(x);
- tempfree(x);
+ if(istemp(x))
+ tfree(x);
}
}
-Cell *program(Node **a, int n) /* execute an awk program */
+Cell *program(Node **a, int) /* execute an awk program */
{ /* a[0] = BEGIN, a[1] = body, a[2] = END */
Cell *x;
@@ -183,7 +189,8 @@
return(True);
if (isjump(x))
FATAL("illegal break, continue, next or nextfile from BEGIN");
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
if (a[1] || a[2])
while (getrec(&record, &recsize, 1) > 0) {
@@ -190,7 +197,8 @@
x = execute(a[1]);
if (isexit(x))
break;
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
ex:
if (setjmp(env) != 0) /* handles exit within END */
@@ -199,7 +207,8 @@
x = execute(a[2]);
if (isbreak(x) || isnext(x) || iscont(x))
FATAL("illegal break, continue, next or nextfile from END");
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
ex1:
return(True);
@@ -214,11 +223,11 @@
#define NARGS 50 /* max args in a call */
-struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
+struct Frame *frame = nil; /* base of stack frames; dynamically allocated */
int nframe = 0; /* number of frames allocated */
-struct Frame *fp = NULL; /* frame pointer. bottom level unused */
+struct Frame *fp = nil; /* frame pointer. bottom level unused */
-Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
+Cell *call(Node **a, int) /* function call. very kludgy and fragile */
{
static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
int i, ncall, ndef;
@@ -231,25 +240,25 @@
s = fcn->nval;
if (!isfcn(fcn))
FATAL("calling undefined function %s", s);
- if (frame == NULL) {
+ if (frame == nil) {
fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame));
- if (frame == NULL)
+ if (frame == nil)
FATAL("out of space for stack frames calling %s", s);
}
- for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
+ for (ncall = 0, x = a[1]; x != nil; x = x->nnext) /* args in call */
ncall++;
ndef = (int) fcn->fval; /* args in defn */
- dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) );
+ dprint( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) );
if (ncall > ndef)
WARNING("function %s called with %d args, uses only %d",
s, ncall, ndef);
if (ncall + ndef > NARGS)
FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
- for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
- dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) );
+ for (i = 0, x = a[1]; x != nil; i++, x = x->nnext) { /* get call args */
+ dprint( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) );
y = execute(x);
oargs[i] = y;
- dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
+ dprint( ("args[%d]: %s %f <%s>, t=%o\n",
i, y->nval, y->fval, isarr(y) ? "(array)" : y->sval, y->tval) );
if (isfcn(y))
FATAL("can't use function %s as argument in %s", y->nval, s);
@@ -257,7 +266,8 @@
args[i] = y; /* arrays by ref */
else
args[i] = copycell(y);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
for ( ; i < ndef; i++) { /* add null args for ones not provided */
args[i] = gettemp();
@@ -268,7 +278,7 @@
int dfp = fp - frame; /* old index */
frame = (struct Frame *)
realloc((char *) frame, (nframe += 100) * sizeof(struct Frame));
- if (frame == NULL)
+ if (frame == nil)
FATAL("out of space for stack frames in %s", s);
fp = frame + dfp;
}
@@ -277,9 +287,9 @@
fp->nargs = ndef; /* number defined with (excess are locals) */
fp->retval = gettemp();
- dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) );
+ dprint( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) );
y = execute((Node *)(fcn->sval)); /* execute body */
- dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) );
+ dprint( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) );
for (i = 0; i < ndef; i++) {
Cell *t = fp->args[i];
@@ -288,25 +298,30 @@
if (i >= ncall) {
freesymtab(t);
t->csub = CTEMP;
- tempfree(t);
+ if (istemp(t))
+ tfree(t);
} else {
oargs[i]->tval = t->tval;
oargs[i]->tval &= ~(STR|NUM|DONTFREE);
oargs[i]->sval = t->sval;
- tempfree(t);
+ if (istemp(t))
+ tfree(t);
}
}
} else if (t != y) { /* kludge to prevent freeing twice */
t->csub = CTEMP;
- tempfree(t);
+ if (istemp(t))
+ tfree(t);
}
}
- tempfree(fcn);
+ if (istemp(fcn))
+ tfree(fcn);
if (isexit(y) || isnext(y) || isnextfile(y))
return y;
- tempfree(y); /* this can free twice! */
+ if (istemp(y))
+ tfree(y); /* this can free twice! */
z = fp->retval; /* return value */
- dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
+ dprint( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
fp--;
return(z);
}
@@ -318,7 +333,7 @@
y = gettemp();
y->csub = CCOPY; /* prevents freeing until call is over */
y->nval = x->nval; /* BUG? */
- y->sval = x->sval ? tostring(x->sval) : NULL;
+ y->sval = x->sval ? tostring(x->sval) : nil;
y->fval = x->fval;
y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */
/* is DONTFREE right? */
@@ -329,7 +344,7 @@
{
n = ptoi(a[0]); /* argument number, counting from 0 */
- dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
+ dprint( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
if (n+1 > fp->nargs)
FATAL("argument #%d of function %s was not supplied",
n+1, fp->fcncell->nval);
@@ -342,14 +357,15 @@
switch (n) {
case EXIT:
- if (a[0] != NULL) {
+ if (a[0] != nil) {
y = execute(a[0]);
errorflag = (int) getfval(y);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
longjmp(env, 1);
case RETURN:
- if (a[0] != NULL) {
+ if (a[0] != nil) {
y = execute(a[0]);
if ((y->tval & (STR|NUM)) == (STR|NUM)) {
setsval(fp->retval, getsval(y));
@@ -362,7 +378,8 @@
setfval(fp->retval, getfval(y));
else /* can't happen */
FATAL("bad type variable %d", y->tval);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
return(jret);
case NEXT:
@@ -384,33 +401,35 @@
{ /* a[0] is variable, a[1] is operator, a[2] is filename */
Cell *r, *x;
extern Cell **fldtab;
- FILE *fp;
+ Biobuf *fp;
char *buf;
int bufsize = recsize;
int mode;
- if ((buf = (char *) malloc(bufsize)) == NULL)
+ if ((buf = (char *) malloc(bufsize)) == nil)
FATAL("out of memory in getline");
- fflush(stdout); /* in case someone is waiting for a prompt */
+ Bflush(&stdout); /* in case someone is waiting for a prompt */
r = gettemp();
- if (a[1] != NULL) { /* getline < file */
+ if (a[1] != nil) { /* getline < file */
x = execute(a[2]); /* filename */
mode = ptoi(a[1]);
if (mode == '|') /* input pipe */
mode = LE; /* arbitrary flag */
fp = openfile(mode, getsval(x));
- tempfree(x);
- if (fp == NULL)
+ if (istemp(x))
+ tfree(x);
+ if (fp == nil)
n = -1;
else
n = readrec(&buf, &bufsize, fp);
if (n <= 0) {
;
- } else if (a[0] != NULL) { /* getline var <file */
+ } else if (a[0] != nil) { /* getline var <file */
x = execute(a[0]);
setsval(x, buf);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
} else { /* getline <file */
setsval(fldtab[0], buf);
if (is_number(fldtab[0]->sval)) {
@@ -419,13 +438,14 @@
}
}
} else { /* bare getline; use current input */
- if (a[0] == NULL) /* getline */
+ if (a[0] == nil) /* getline */
n = getrec(&record, &recsize, 1);
else { /* getline var */
n = getrec(&buf, &bufsize, 0);
x = execute(a[0]);
setsval(x, buf);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
}
setfval(r, (Awkfloat) n);
@@ -433,7 +453,7 @@
return r;
}
-Cell *getnf(Node **a, int n) /* get NF */
+Cell *getnf(Node **a, int) /* get NF */
{
if (donefld == 0)
fldbld();
@@ -440,7 +460,7 @@
return (Cell *) a[0];
}
-Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
+Cell *array(Node **a, int) /* a[0] is symtab, a[1] is list of subscripts */
{
Cell *x, *y, *z;
char *s;
@@ -449,7 +469,7 @@
int bufsz = recsize;
int nsub = strlen(*SUBSEP);
- if ((buf = (char *) malloc(bufsz)) == NULL)
+ if ((buf = (char *) malloc(bufsz)) == nil)
FATAL("out of memory in array");
x = execute(a[0]); /* Cell* for symbol table */
@@ -462,10 +482,11 @@
strcat(buf, s);
if (np->nnext)
strcat(buf, *SUBSEP);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
if (!isarr(x)) {
- dprintf( ("making %s into an array\n", x->nval) );
+ dprint( ("making %s into an array\n", x->nval) );
if (freeable(x))
xfree(x->sval);
x->tval &= ~(STR|NUM|DONTFREE);
@@ -475,12 +496,13 @@
z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
z->ctype = OCELL;
z->csub = CVAR;
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
free(buf);
return(z);
}
-Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
+Cell *awkdelete(Node **a, int) /* a[0] is symtab, a[1] is list of subscripts */
{
Cell *x, *y;
Node *np;
@@ -498,7 +520,7 @@
} else {
int bufsz = recsize;
char *buf;
- if ((buf = (char *) malloc(bufsz)) == NULL)
+ if ((buf = (char *) malloc(bufsz)) == nil)
FATAL("out of memory in adelete");
buf[0] = 0;
for (np = a[1]; np; np = np->nnext) {
@@ -509,16 +531,18 @@
strcat(buf, s);
if (np->nnext)
strcat(buf, *SUBSEP);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
freeelem(x, buf);
free(buf);
}
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
return True;
}
-Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
+Cell *intest(Node **a, int) /* a[0] is index (list), a[1] is symtab */
{
Cell *x, *ap, *k;
Node *p;
@@ -529,7 +553,7 @@
ap = execute(a[1]); /* array name */
if (!isarr(ap)) {
- dprintf( ("making %s into an array\n", ap->nval) );
+ dprint( ("making %s into an array\n", ap->nval) );
if (freeable(ap))
xfree(ap->sval);
ap->tval &= ~(STR|NUM|DONTFREE);
@@ -536,7 +560,7 @@
ap->tval |= ARR;
ap->sval = (char *) makesymtab(NSYMTAB);
}
- if ((buf = (char *) malloc(bufsz)) == NULL) {
+ if ((buf = (char *) malloc(bufsz)) == nil) {
FATAL("out of memory in intest");
}
buf[0] = 0;
@@ -546,14 +570,16 @@
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, 0))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
strcat(buf, s);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
if (p->nnext)
strcat(buf, *SUBSEP);
}
k = lookup(buf, (Array *) ap->sval);
- tempfree(ap);
+ if (istemp(ap))
+ tfree(ap);
free(buf);
- if (k == NULL)
+ if (k == nil)
return(False);
else
return(True);
@@ -575,19 +601,21 @@
y = execute(a[2]); /* a[2] = regular expr */
t = getsval(y);
p = compre(t);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
if (n == MATCHFCN)
i = pmatch(p, s, s);
else
i = match(p, s, s);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
if (n == MATCHFCN) {
- int start = countposn(s, patbeg-s)+1;
+ int start = utfnlen(s, patbeg-s)+1;
if (patlen < 0)
start = 0;
setfval(rstartloc, (Awkfloat) start);
- setfval(rlengthloc, (Awkfloat) countposn(patbeg, patlen));
+ setfval(rlengthloc, (Awkfloat) utfnlen(patbeg, patlen));
x = gettemp();
x->tval = NUM;
x->fval = start;
@@ -606,13 +634,15 @@
x = execute(a[0]);
i = istrue(x);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
switch (n) {
case BOR:
if (i) return(True);
y = execute(a[1]);
i = istrue(y);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
if (i) return(True);
else return(False);
case AND:
@@ -619,7 +649,8 @@
if ( !i ) return(False);
y = execute(a[1]);
i = istrue(y);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
if (i) return(True);
else return(False);
case NOT:
@@ -645,8 +676,10 @@
} else {
i = strcmp(getsval(x), getsval(y));
}
- tempfree(x);
- tempfree(y);
+ if (istemp(x))
+ tfree(x);
+ if (istemp(y))
+ tfree(y);
switch (n) {
case LT: if (i<0) return(True);
else return(False);
@@ -669,7 +702,7 @@
void tfree(Cell *a) /* free a tempcell */
{
if (freeable(a)) {
- dprintf( ("freeing %s %s %o\n", a->nval, a->sval, a->tval) );
+ dprint( ("freeing %s %s %o\n", a->nval, a->sval, a->tval) );
xfree(a->sval);
}
if (a == tmps)
@@ -696,7 +729,7 @@
return(x);
}
-Cell *indirect(Node **a, int n) /* $( a[0] ) */
+Cell *indirect(Node **a, int) /* $( a[0] ) */
{
Cell *x;
int m;
@@ -707,7 +740,8 @@
if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
/* BUG: can x->nval ever be null??? */
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = fieldadr(m);
x->ctype = OCELL; /* BUG? why are these needed? */
x->csub = CFLD;
@@ -714,9 +748,10 @@
return(x);
}
-Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
+Cell *substr(Node **a, int) /* substr(a[0], a[1], a[2]) */
{
int k, m, n;
+ Rune r;
char *s, *p;
int temp;
Cell *x, *y, *z = 0;
@@ -726,12 +761,16 @@
if (a[2] != 0)
z = execute(a[2]);
s = getsval(x);
- k = countposn(s, strlen(s)) + 1;
+ k = utfnlen(s, strlen(s)) + 1;
if (k <= 1) {
- tempfree(x);
- tempfree(y);
- if (a[2] != 0)
- tempfree(z);
+ if (istemp(x))
+ tfree(x);
+ if (istemp(y))
+ tfree(y);
+ if (a[2] != 0) {
+ if (istemp(z))
+ tfree(z);
+ }
x = gettemp();
setsval(x, "");
return(x);
@@ -741,10 +780,12 @@
m = 1;
else if (m > k)
m = k;
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
if (a[2] != 0) {
n = (int) getfval(z);
- tempfree(z);
+ if (istemp(z))
+ tfree(z);
} else
n = k - 1;
if (n < 0)
@@ -751,21 +792,22 @@
n = 0;
else if (n > k - m)
n = k - m;
- dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
+ dprint( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
y = gettemp();
while (*s && --m)
- s += mblen(s, k);
- for (p = s; *p && n--; p += mblen(p, k))
+ s += chartorune(&r, s);
+ for (p = s; *p && n--; p += chartorune(&r, p))
;
temp = *p; /* with thanks to John Linderman */
*p = '\0';
setsval(y, s);
*p = temp;
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
return(y);
}
-Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
+Cell *sindex(Node **a, int) /* index(a[0], a[1]) */
{
Cell *x, *y, *z;
char *s1, *s2, *p1, *p2, *q;
@@ -781,12 +823,14 @@
for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
;
if (*p2 == '\0') {
- v = (Awkfloat) countposn(s1, p1-s1) + 1; /* origin 1 */
+ v = (Awkfloat) utfnlen(s1, p1-s1) + 1; /* origin 1 */
break;
}
}
- tempfree(x);
- tempfree(y);
+ if (istemp(x))
+ tfree(x);
+ if (istemp(y))
+ tfree(y);
setfval(z, v);
return(z);
}
@@ -798,7 +842,7 @@
char *fmt;
char *p, *t, *os;
Cell *x;
- int flag = 0, n;
+ int flag, n;
int fmtwd; /* format width */
int fmtsz = recsize;
char *buf = *pbuf;
@@ -806,7 +850,7 @@
os = s;
p = buf;
- if ((fmt = (char *) malloc(fmtsz)) == NULL)
+ if ((fmt = (char *) malloc(fmtsz)) == nil)
FATAL("out of memory in format()");
while (*s) {
adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format");
@@ -832,12 +876,13 @@
if (*s == '*') {
x = execute(a);
a = a->nnext;
- sprintf(t-1, "%d", fmtwd=(int) getfval(x));
+ sprint(t-1, "%d", fmtwd=(int) getfval(x));
if (fmtwd < 0)
fmtwd = -fmtwd;
adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
t = fmt + strlen(fmt);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
}
*t = '\0';
@@ -856,8 +901,14 @@
*t = 'd';
*++t = '\0';
break;
- case 'o': case 'x': case 'X': case 'u':
+ case 'u':
flag = *(s-1) == 'l' ? 2 : 3;
+ *t++ = 'u';
+ *t++ = 'd';
+ *t = '\0';
+ break;
+ case 'o': case 'x': case 'X':
+ flag = *(s-1) == 'l' ? 2 : 3;
break;
case 's':
flag = 4;
@@ -870,7 +921,7 @@
flag = 0;
break;
}
- if (a == NULL)
+ if (a == nil)
FATAL("not enough args in printf(%s)", os);
x = execute(a);
a = a->nnext;
@@ -879,7 +930,7 @@
n = fmtwd;
adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format");
switch (flag) {
- case 0: sprintf(p, "%s", fmt); /* unknown, so dump it too */
+ case 0: sprint(p, "%s", fmt); /* unknown, so dump it too */
t = getsval(x);
n = strlen(t);
if (fmtwd > n)
@@ -886,11 +937,11 @@
n = fmtwd;
adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format");
p += strlen(p);
- sprintf(p, "%s", t);
+ sprint(p, "%s", t);
break;
- case 1: sprintf(p, fmt, getfval(x)); break;
- case 2: sprintf(p, fmt, (long) getfval(x)); break;
- case 3: sprintf(p, fmt, (int) getfval(x)); break;
+ case 1: sprint(p, fmt, getfval(x)); break;
+ case 2: sprint(p, fmt, (long) getfval(x)); break;
+ case 3: sprint(p, fmt, (int) getfval(x)); break;
case 4:
t = getsval(x);
n = strlen(t);
@@ -898,21 +949,26 @@
n = fmtwd;
if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, 0))
FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
- sprintf(p, fmt, t);
+ sprint(p, fmt, t);
break;
case 5:
if (isnum(x)) {
- if (getfval(x))
- sprintf(p, fmt, (int) getfval(x));
- else{
+ if (getfval(x)) {
+ *p++ = (uchar)getfval(x);
+ *p = '\0';
+ } else {
*p++ = '\0';
*p = '\0';
}
- } else
- sprintf(p, fmt, getsval(x)[0]);
+ } else {
+ if((*p = getsval(x)[0]) != '\0')
+ p++;
+ *p = '\0';
+ }
break;
}
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
p += strlen(p);
s++;
}
@@ -925,7 +981,7 @@
return p - buf;
}
-Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
+Cell *awksprintf(Node **a, int) /* sprint(a[0]) */
{
Cell *x;
Node *y;
@@ -932,13 +988,14 @@
char *buf;
int bufsz=3*recsize;
- if ((buf = (char *) malloc(bufsz)) == NULL)
- FATAL("out of memory in awksprintf");
+ if ((buf = (char *) malloc(bufsz)) == nil)
+ FATAL("out of memory in awksprint");
y = a[0]->nnext;
x = execute(a[0]);
if (format(&buf, &bufsz, getsval(x), y) == -1)
- FATAL("sprintf string %.30s... too long. can't happen.", buf);
- tempfree(x);
+ FATAL("sprint string %.30s... too long. can't happen.", buf);
+ if (istemp(x))
+ tfree(x);
x = gettemp();
x->sval = buf;
x->tval = STR;
@@ -945,10 +1002,10 @@
return(x);
}
-Cell *awkprintf(Node **a, int n) /* printf */
+Cell *awkprintf(Node **a, int) /* printf */
{ /* a[0] is list of args, starting with format string */
/* a[1] is redirection operator, a[2] is redirection file */
- FILE *fp;
+ Biobuf *fp;
Cell *x;
Node *y;
char *buf;
@@ -955,25 +1012,25 @@
int len;
int bufsz=3*recsize;
- if ((buf = (char *) malloc(bufsz)) == NULL)
+ if ((buf = (char *) malloc(bufsz)) == nil)
FATAL("out of memory in awkprintf");
y = a[0]->nnext;
x = execute(a[0]);
if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
FATAL("printf string %.30s... too long. can't happen.", buf);
- tempfree(x);
- if (a[1] == NULL) {
+ if (istemp(x))
+ tfree(x);
+ if (a[1] == nil) {
/* fputs(buf, stdout); */
- fwrite(buf, len, 1, stdout);
- if (ferror(stdout))
+ if (Bwrite(&stdout, buf, len) < 0)
FATAL("write error on stdout");
+ Bflush(&stdout);
} else {
fp = redirect(ptoi(a[1]), a[2]);
/* fputs(buf, fp); */
- fwrite(buf, len, 1, fp);
- fflush(fp);
- if (ferror(fp))
+ if(Bwrite(fp, buf, len) < 0)
FATAL("write error on %s", filename(fp));
+ Bflush(fp);
}
free(buf);
return(True);
@@ -987,11 +1044,13 @@
x = execute(a[0]);
i = getfval(x);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
if (n != UMINUS) {
y = execute(a[1]);
j = getfval(y);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
z = gettemp();
switch (n) {
@@ -1060,7 +1119,8 @@
z = gettemp();
setfval(z, xf);
setfval(x, xf + k);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
return(z);
}
@@ -1074,8 +1134,8 @@
x = execute(a[0]);
if (n == ASSIGN) { /* ordinary assignment */
if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
- ; /* leave alone unless it's a field */
- else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
+ goto Free; /* leave alone unless it's a field */
+ if ((y->tval & (STR|NUM)) == (STR|NUM)) {
setsval(x, getsval(y));
x->fval = getfval(y);
x->tval |= NUM;
@@ -1086,7 +1146,9 @@
setfval(x, getfval(y));
else
funnyvar(y, "read value of");
- tempfree(y);
+Free:
+ if (istemp(y))
+ tfree(y);
return(x);
}
xf = getfval(x);
@@ -1122,12 +1184,13 @@
FATAL("illegal assignment operator %d", n);
break;
}
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
setfval(x, xf);
return(x);
}
-Cell *cat(Node **a, int q) /* a[0] cat a[1] */
+Cell *cat(Node **a, int) /* a[0] cat a[1] */
{
Cell *x, *y, *z;
int n1, n2;
@@ -1140,20 +1203,22 @@
n1 = strlen(x->sval);
n2 = strlen(y->sval);
s = (char *) malloc(n1 + n2 + 1);
- if (s == NULL)
+ if (s == nil)
FATAL("out of space concatenating %.15s... and %.15s...",
x->sval, y->sval);
strcpy(s, x->sval);
strcpy(s+n1, y->sval);
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
z = gettemp();
z->sval = s;
z->tval = STR;
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
return(z);
}
-Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
+Cell *pastat(Node **a, int) /* a[0] { a[1] } */
{
Cell *x;
@@ -1162,7 +1227,8 @@
else {
x = execute(a[0]);
if (istrue(x)) {
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[1]);
}
}
@@ -1169,7 +1235,7 @@
return x;
}
-Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
+Cell *dopa2(Node **a, int) /* a[0], a[1] { a[2] } */
{
Cell *x;
int pair;
@@ -1179,13 +1245,15 @@
x = execute(a[0]);
if (istrue(x))
pairstack[pair] = 1;
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
if (pairstack[pair] == 1) {
x = execute(a[1]);
if (istrue(x))
pairstack[pair] = 0;
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[2]);
return(x);
}
@@ -1192,12 +1260,12 @@
return(False);
}
-Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
+Cell *split(Node **a, int) /* split(a[0], a[1], a[2]); a[3] is type */
{
Cell *x = 0, *y, *ap;
char *s, *t, *fs = 0;
char temp, num[50];
- int n, nb, sep, tempstat, arg3type;
+ int n, nb, sep, arg3type;
y = execute(a[0]); /* source string */
s = getsval(y);
@@ -1217,7 +1285,7 @@
y->tval |= DONTFREE; /* split(a[x], a); */
freesymtab(ap);
y->tval = n;
- dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) );
+ dprint( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) );
ap->tval &= ~STR;
ap->tval |= ARR;
ap->sval = (char *) makesymtab(NSYMTAB);
@@ -1234,7 +1302,7 @@
if (nematch(p,s,t)) {
do {
n++;
- sprintf(num, "%d", n);
+ sprint(num, "%d", n);
temp = *patbeg;
*patbeg = '\0';
if (is_number(t))
@@ -1245,7 +1313,7 @@
t = patbeg + patlen;
if (t[-1] == 0 || *t == 0) {
n++;
- sprintf(num, "%d", n);
+ sprint(num, "%d", n);
setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
goto spdone;
}
@@ -1252,13 +1320,14 @@
} while (nematch(p,s,t));
}
n++;
- sprintf(num, "%d", n);
+ sprint(num, "%d", n);
if (is_number(t))
setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
spdone:
- p = NULL;
+ p = nil;
+ USED(p);
} else if (sep == ' ') {
for (n = 0; ; ) {
while (*s == ' ' || *s == '\t' || *s == '\n')
@@ -1272,7 +1341,7 @@
while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
temp = *s;
*s = '\0';
- sprintf(num, "%d", n);
+ sprint(num, "%d", n);
if (is_number(t))
setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
else
@@ -1287,7 +1356,7 @@
char buf[UTFmax+1];
n++;
- snprintf(num, sizeof num, "%d", n);
+ snprint(num, sizeof num, "%d", n);
nb = chartorune(&r, s);
memmove(buf, s, nb);
buf[nb] = '\0';
@@ -1304,7 +1373,7 @@
s++;
temp = *s;
*s = '\0';
- sprintf(num, "%d", n);
+ sprint(num, "%d", n);
if (is_number(t))
setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
else
@@ -1314,10 +1383,13 @@
break;
}
}
- tempfree(ap);
- tempfree(y);
+ if (istemp(ap))
+ tfree(ap);
+ if (istemp(y))
+ tfree(y);
if (a[2] != 0 && arg3type == STRING)
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = gettemp();
x->tval = NUM;
x->fval = n;
@@ -1324,37 +1396,41 @@
return(x);
}
-Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
+Cell *condexpr(Node **a, int) /* a[0] ? a[1] : a[2] */
{
Cell *x;
x = execute(a[0]);
if (istrue(x)) {
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[1]);
} else {
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[2]);
}
return(x);
}
-Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
+Cell *ifstat(Node **a, int) /* if (a[0]) a[1]; else a[2] */
{
Cell *x;
x = execute(a[0]);
if (istrue(x)) {
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[1]);
} else if (a[2] != 0) {
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[2]);
}
return(x);
}
-Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
+Cell *whilestat(Node **a, int) /* while (a[0]) a[1] */
{
Cell *x;
@@ -1362,7 +1438,8 @@
x = execute(a[0]);
if (!istrue(x))
return(x);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[1]);
if (isbreak(x)) {
x = True;
@@ -1370,11 +1447,12 @@
}
if (isnext(x) || isexit(x) || isret(x))
return(x);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
}
-Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
+Cell *dostat(Node **a, int) /* do a[0]; while(a[1]) */
{
Cell *x;
@@ -1384,25 +1462,30 @@
return True;
if (isnext(x) || isnextfile(x) || isexit(x) || isret(x))
return(x);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[1]);
if (!istrue(x))
return(x);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
}
-Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
+Cell *forstat(Node **a, int) /* for (a[0]; a[1]; a[2]) a[3] */
{
Cell *x;
x = execute(a[0]);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
for (;;) {
if (a[1]!=0) {
x = execute(a[1]);
- if (!istrue(x)) return(x);
- else tempfree(x);
+ if (!istrue(x))
+ return(x);
+ else if (istemp(x))
+ tfree(x);
}
x = execute(a[3]);
if (isbreak(x)) /* turn off break */
@@ -1409,13 +1492,15 @@
return True;
if (isnext(x) || isexit(x) || isret(x))
return(x);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = execute(a[2]);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
}
-Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
+Cell *instat(Node **a, int) /* for (a[0] in a[1]) a[2] */
{
Cell *x, *vp, *arrayp, *cp, *ncp;
Array *tp;
@@ -1427,36 +1512,40 @@
return True;
}
tp = (Array *) arrayp->sval;
- tempfree(arrayp);
+ if (istemp(arrayp))
+ tfree(arrayp);
for (i = 0; i < tp->size; i++) { /* this routine knows too much */
- for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
+ for (cp = tp->tab[i]; cp != nil; cp = ncp) {
setsval(vp, cp->nval);
ncp = cp->cnext;
x = execute(a[2]);
if (isbreak(x)) {
- tempfree(vp);
+ if (istemp(vp))
+ tfree(vp);
return True;
}
if (isnext(x) || isexit(x) || isret(x)) {
- tempfree(vp);
+ if (istemp(vp))
+ tfree(vp);
return(x);
}
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
}
}
return True;
}
-Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
+Cell *bltin(Node **a, int) /* builtin functions. a[0] is type, a[1] is arg list */
{
Cell *x, *y;
Awkfloat u;
int t;
- wchar_t wc;
+ Rune wc;
char *p, *buf;
char mbc[50];
Node *nextarg;
- FILE *fp;
+ Biobuf *fp;
void flush_all(void);
t = ptoi(a[0]);
@@ -1465,10 +1554,10 @@
switch (t) {
case FLENGTH:
if (isarr(x))
- u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
+ u = ((Array *) x->sval)->nelemt; /* GROT. should be function*/
else {
p = getsval(x);
- u = (Awkfloat) countposn(p, strlen(p));
+ u = (Awkfloat) utfnlen(p, strlen(p));
}
break;
case FLOG:
@@ -1490,12 +1579,13 @@
} else {
y = execute(a[1]->nnext);
u = atan2(getfval(x), getfval(y));
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
nextarg = nextarg->nnext;
}
break;
case FSYSTEM:
- fflush(stdout); /* in case something is buffered already */
+ Bflush(&stdout); /* in case something is buffered already */
u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
break;
case FRAND:
@@ -1504,7 +1594,7 @@
break;
case FSRAND:
if (isrec(x)) /* no argument provided */
- u = time((time_t *)0);
+ u = time(nil);
else
u = getfval(x);
srand((unsigned int) u);
@@ -1521,7 +1611,8 @@
if (isupper(*p))
*p = tolower(*p);
}
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = gettemp();
setsval(x, buf);
free(buf);
@@ -1530,15 +1621,16 @@
if (isrec(x) || strlen(getsval(x)) == 0) {
flush_all(); /* fflush() or fflush("") -> all */
u = 0;
- } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
- u = EOF;
+ } else if ((fp = openfile(FFLUSH, getsval(x))) == nil)
+ u = Beof;
else
- u = fflush(fp);
+ u = Bflush(fp);
break;
case FUTF:
wc = (int)getfval(x);
- mbc[wctomb(mbc, wc)] = 0;
- tempfree(x);
+ mbc[runetochar(mbc, &wc)] = 0;
+ if (istemp(x))
+ tfree(x);
x = gettemp();
setsval(x, mbc);
return x;
@@ -1546,7 +1638,8 @@
FATAL("illegal function type %d", t);
break;
}
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
x = gettemp();
setfval(x, u);
if (nextarg != 0) {
@@ -1557,45 +1650,43 @@
return(x);
}
-Cell *printstat(Node **a, int n) /* print a[0] */
+Cell *printstat(Node **a, int) /* print a[0] */
{
int r;
Node *x;
Cell *y;
- FILE *fp;
+ Biobuf *fp;
if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
- fp = stdout;
+ fp = &stdout;
else
fp = redirect(ptoi(a[1]), a[2]);
- for (x = a[0]; x != NULL; x = x->nnext) {
+ for (x = a[0]; x != nil; x = x->nnext) {
y = execute(x);
- fputs(getsval(y), fp);
- tempfree(y);
- if (x->nnext == NULL)
- r = fputs(*ORS, fp);
+ Bwrite(fp, getsval(y), strlen(getsval(y)));
+ if (istemp(y))
+ tfree(y);
+ if (x->nnext == nil)
+ r = Bprint(fp, "%s", *ORS);
else
- r = fputs(*OFS, fp);
- if (r == EOF)
+ r = Bprint(fp, "%s", *OFS);
+ if (r < 0)
FATAL("write error on %s", filename(fp));
}
- if (a[1] != 0)
- if (fflush(fp) == EOF)
- FATAL("write error on %s", filename(fp));
+ if (Bflush(fp) < 0)
+ FATAL("write error on %s", filename(fp));
return(True);
}
-Cell *nullproc(Node **a, int n)
+Cell *nullproc(Node **, int)
{
- n = n;
- a = a;
return 0;
}
-FILE *redirect(int a, Node *b) /* set up all i/o redirections */
+Biobuf *redirect(int a, Node *b) /* set up all i/o redirections */
{
- FILE *fp;
+ Biobuf *fp;
Cell *x;
char *fname;
@@ -1602,34 +1693,35 @@
x = execute(b);
fname = getsval(x);
fp = openfile(a, fname);
- if (fp == NULL)
+ if (fp == nil)
FATAL("can't open file %s", fname);
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
return fp;
}
struct files {
- FILE *fp;
+ Biobuf *fp;
char *fname;
int mode; /* '|', 'a', 'w' => LE/LT, GT */
} files[FOPEN_MAX] ={
- { NULL, "/dev/stdin", LT }, /* watch out: don't free this! */
- { NULL, "/dev/stdout", GT },
- { NULL, "/dev/stderr", GT }
+ { nil, "/dev/stdin", LT }, /* watch out: don't free this! */
+ { nil, "/dev/stdout", GT },
+ { nil, "/dev/stderr", GT }
};
void stdinit(void) /* in case stdin, etc., are not constants */
{
- files[0].fp = stdin;
- files[1].fp = stdout;
- files[2].fp = stderr;
+ files[0].fp = &stdin;
+ files[1].fp = &stdout;
+ files[2].fp = &stderr;
}
-FILE *openfile(int a, char *us)
+Biobuf *openfile(int a, char *us)
{
char *s = us;
int i, m;
- FILE *fp = 0;
+ Biobuf *fp = nil;
if (*s == '\0')
FATAL("null file name in print or getline");
@@ -1641,7 +1733,7 @@
return files[i].fp;
}
if (a == FFLUSH) /* didn't find it, so don't create it! */
- return NULL;
+ return nil;
for (i=0; i < FOPEN_MAX; i++)
if (files[i].fp == 0)
@@ -1648,22 +1740,23 @@
break;
if (i >= FOPEN_MAX)
FATAL("%s makes too many open files", s);
- fflush(stdout); /* force a semblance of order */
+ Bflush(&stdout); /* force a semblance of order */
m = a;
if (a == GT) {
- fp = fopen(s, "w");
+ fp = Bopen(s, OWRITE);
} else if (a == APPEND) {
- fp = fopen(s, "a");
+ fp = Bopen(s, OWRITE);
+ Bseek(fp, 0LL, 2);
m = GT; /* so can mix > and >> */
} else if (a == '|') { /* output pipe */
- fp = popen(s, "w");
+ fp = popen(s, OWRITE);
} else if (a == LE) { /* input pipe */
- fp = popen(s, "r");
+ fp = popen(s, OREAD);
} else if (a == LT) { /* getline <file */
- fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
+ fp = strcmp(s, "-") == 0 ? &stdin : Bopen(s, OREAD); /* "-" is stdin */
} else /* can't happen */
FATAL("illegal redirection %d", a);
- if (fp != NULL) {
+ if (fp != nil) {
files[i].fname = tostring(s);
files[i].fp = fp;
files[i].mode = m;
@@ -1671,7 +1764,7 @@
return fp;
}
-char *filename(FILE *fp)
+char *filename(Biobuf *fp)
{
int i;
@@ -1681,30 +1774,28 @@
return "???";
}
-Cell *closefile(Node **a, int n)
+Cell *closefile(Node **a, int)
{
Cell *x;
int i, stat;
- n = n;
x = execute(a[0]);
getsval(x);
for (i = 0; i < FOPEN_MAX; i++)
if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
- if (ferror(files[i].fp))
- WARNING( "i/o error occurred on %s", files[i].fname );
if (files[i].mode == '|' || files[i].mode == LE)
stat = pclose(files[i].fp);
else
- stat = fclose(files[i].fp);
- if (stat == EOF)
+ stat = Bterm(files[i].fp);
+ if (stat == Beof)
WARNING( "i/o error occurred closing %s", files[i].fname );
if (i > 2) /* don't do /dev/std... */
xfree(files[i].fname);
- files[i].fname = NULL; /* watch out for ref thru this */
- files[i].fp = NULL;
+ files[i].fname = nil; /* watch out for ref thru this */
+ files[i].fp = nil;
}
- tempfree(x);
+ if (istemp(x))
+ tfree(x);
return(True);
}
@@ -1714,13 +1805,11 @@
for (i = 0; i < FOPEN_MAX; i++)
if (files[i].fp) {
- if (ferror(files[i].fp))
- WARNING( "i/o error occurred on %s", files[i].fname );
if (files[i].mode == '|' || files[i].mode == LE)
stat = pclose(files[i].fp);
else
- stat = fclose(files[i].fp);
- if (stat == EOF)
+ stat = Bterm(files[i].fp);
+ if (stat < -1)
WARNING( "i/o error occurred while closing %s", files[i].fname );
}
}
@@ -1731,12 +1820,12 @@
for (i = 0; i < FOPEN_MAX; i++)
if (files[i].fp)
- fflush(files[i].fp);
+ Bflush(files[i].fp);
}
void backsub(char **pb_ptr, char **sptr_ptr);
-Cell *sub(Node **a, int nnn) /* substitute command */
+Cell *sub(Node **a, int) /* substitute command */
{
char *sptr, *pb, *q;
Cell *x, *y, *result;
@@ -1744,7 +1833,7 @@
void *p;
int bufsz = recsize;
- if ((buf = (char *) malloc(bufsz)) == NULL)
+ if ((buf = (char *) malloc(bufsz)) == nil)
FATAL("out of memory in sub");
x = execute(a[3]); /* target string */
t = getsval(x);
@@ -1753,7 +1842,8 @@
else {
y = execute(a[1]);
p = compre(getsval(y));
- tempfree(y);
+ if (istemp(y))
+ tfree(y);
}
y = execute(a[2]); /* replacement string */
result = False;
@@ -1790,22 +1880,24 @@
setsval(x, buf); /* BUG: should be able to avoid copy */
result = True;;
}
- tempfree(x);
- tempfree(y);
+ if (istemp(x))
+ tfree(x);
+ if (istemp(y))
+ tfree(y);
free(buf);
return result;
}
-Cell *gsub(Node **a, int nnn) /* global substitute */
+Cell *gsub(Node **a, int) /* global substitute */
{
Cell *x, *y;
- char *rptr, *sptr, *t, *pb, *c;
+ char *rptr, *sptr, *t, *pb, *c, *s;
char *buf;
void *p;
int mflag, num;
int bufsz = recsize;
- if ((buf = (char *)malloc(bufsz)) == NULL)
+ if ((buf = (char *)malloc(bufsz)) == nil)
FATAL("out of memory in gsub");
mflag = 0; /* if mflag == 0, can replace empty string */
num = 0;
@@ -1815,8 +1907,10 @@
p = (void *) a[1]; /* regular expression */
else {
y = execute(a[1]);
- p = compre(getsval(y));
- tempfree(y);
+ s = getsval(y);
+ p = compre(s);
+ if (istemp(y))
+ tfree(y);
}
y = execute(a[2]); /* replacement string */
if (pmatch(p, t, c)) {
@@ -1886,8 +1980,10 @@
*pb = '\0';
setsval(x, buf); /* BUG: should be able to avoid copy + free */
}
- tempfree(x);
- tempfree(y);
+ if (istemp(x))
+ tfree(x);
+ if (istemp(y))
+ tfree(y);
x = gettemp();
x->tval = NUM;
x->fval = num;
--- a/sys/src/cmd/awk/tran.c
+++ b/sys/src/cmd/awk/tran.c
@@ -22,12 +22,10 @@
THIS SOFTWARE.
****************************************************************/
-#define DEBUG
-#include <stdio.h>
-#include <math.h>
+#include <u.h>
+#include <libc.h>
#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
+#include <bio.h>
#include "awk.h"
#include "y.tab.h"
@@ -46,7 +44,7 @@
Awkfloat *NR; /* number of current record */
Awkfloat *FNR; /* number of current record in current file */
char **FILENAME; /* current filename argument */
-Awkfloat *ARGC; /* number of arguments from command line */
+Awkfloat *AARGC; /* number of arguments from command line */
char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
Awkfloat *RLENGTH; /* length of same */
@@ -101,12 +99,12 @@
int i;
char temp[50];
- ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
+ AARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) {
- sprintf(temp, "%d", i);
+ sprint(temp, "%d", i);
if (is_number(*av))
setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
else
@@ -124,7 +122,7 @@
ENVtab = makesymtab(NSYMTAB);
cp->sval = (char *) ENVtab;
for ( ; *envp; envp++) {
- if ((p = strchr(*envp, '=')) == NULL)
+ if ((p = strchr(*envp, '=')) == nil)
continue;
*p++ = 0; /* split into two strings at = */
if (is_number(p))
@@ -142,9 +140,9 @@
ap = (Array *) malloc(sizeof(Array));
tp = (Cell **) calloc(n, sizeof(Cell *));
- if (ap == NULL || tp == NULL)
+ if (ap == nil || tp == nil)
FATAL("out of space in makesymtab");
- ap->nelem = 0;
+ ap->nelemt = 0;
ap->size = n;
ap->tab = tp;
return(ap);
@@ -159,10 +157,10 @@
if (!isarr(ap))
return;
tp = (Array *) ap->sval;
- if (tp == NULL)
+ if (tp == nil)
return;
for (i = 0; i < tp->size; i++) {
- for (cp = tp->tab[i]; cp != NULL; cp = temp) {
+ for (cp = tp->tab[i]; cp != nil; cp = temp) {
xfree(cp->nval);
if (freeable(cp))
xfree(cp->sval);
@@ -178,14 +176,14 @@
void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
{
Array *tp;
- Cell *p, *prev = NULL;
+ Cell *p, *prev = nil;
int h;
tp = (Array *) ap->sval;
h = hash(s, tp->size);
- for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
+ for (p = tp->tab[h]; p != nil; prev = p, p = p->cnext)
if (strcmp(s, p->nval) == 0) {
- if (prev == NULL) /* 1st one */
+ if (prev == nil) /* 1st one */
tp->tab[h] = p->cnext;
else /* middle somewhere */
prev->cnext = p->cnext;
@@ -193,7 +191,7 @@
xfree(p->sval);
free(p->nval);
free(p);
- tp->nelem--;
+ tp->nelemt--;
return;
}
}
@@ -203,13 +201,13 @@
int h;
Cell *p;
- if (n != NULL && (p = lookup(n, tp)) != NULL) {
- dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
+ if (n != nil && (p = lookup(n, tp)) != nil) {
+ dprint( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) );
return(p);
}
p = (Cell *) malloc(sizeof(Cell));
- if (p == NULL)
+ if (p == nil)
FATAL("out of space for symbol table at %s", n);
p->nval = tostring(n);
p->sval = s ? tostring(s) : tostring("");
@@ -217,13 +215,13 @@
p->tval = t;
p->csub = CUNK;
p->ctype = OCELL;
- tp->nelem++;
- if (tp->nelem > FULLTAB * tp->size)
+ tp->nelemt++;
+ if (tp->nelemt > FULLTAB * tp->size)
rehash(tp);
h = hash(n, tp->size);
p->cnext = tp->tab[h];
tp->tab[h] = p;
- dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
+ dprint( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) );
return(p);
}
@@ -244,7 +242,7 @@
nsz = GROWTAB * tp->size;
np = (Cell **) calloc(nsz, sizeof(Cell *));
- if (np == NULL) /* can't do it, but can keep running. */
+ if (np == nil) /* can't do it, but can keep running. */
return; /* someone else will run out later. */
for (i = 0; i < tp->size; i++) {
for (cp = tp->tab[i]; cp; cp = op) {
@@ -265,10 +263,10 @@
int h;
h = hash(s, tp->size);
- for (p = tp->tab[h]; p != NULL; p = p->cnext)
+ for (p = tp->tab[h]; p != nil; p = p->cnext)
if (strcmp(s, p->nval) == 0)
return(p); /* found it */
- return(NULL); /* not found */
+ return(nil); /* not found */
}
Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
@@ -282,7 +280,7 @@
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
- dprintf( ("setting field %d to %g\n", fldno, f) );
+ dprint( ("setting field %d to %g\n", fldno, f) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
@@ -291,7 +289,7 @@
xfree(vp->sval); /* free any previous string */
vp->tval &= ~STR; /* mark string invalid */
vp->tval |= NUM; /* mark number ok */
- dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
+ dprint( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
return vp->fval = f;
}
@@ -310,7 +308,7 @@
char *t;
int fldno;
- dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
+ dprint( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
@@ -318,7 +316,7 @@
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
- dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
+ dprint( ("setting field %d to %s (%p)\n", fldno, s, s) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
@@ -329,7 +327,7 @@
if (freeable(vp))
xfree(vp->sval);
vp->tval &= ~DONTFREE;
- dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
+ dprint( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
return(vp->sval = t);
}
@@ -346,7 +344,7 @@
if (is_number(vp->sval) && !(vp->tval&CON))
vp->tval |= NUM; /* make NUM only sparingly */
}
- dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
+ dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
return(vp->fval);
}
@@ -365,14 +363,14 @@
if (freeable(vp))
xfree(vp->sval);
if (modf(vp->fval, &dtemp) == 0) /* it's integral */
- sprintf(s, "%.30g", vp->fval);
+ sprint(s, "%.30g", vp->fval);
else
- sprintf(s, *CONVFMT, vp->fval);
+ sprint(s, *CONVFMT, vp->fval);
vp->sval = tostring(s);
vp->tval &= ~DONTFREE;
vp->tval |= STR;
}
- dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
+ dprint( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
return(vp->sval);
}
@@ -381,7 +379,7 @@
char *p;
p = (char *) malloc(strlen(s)+1);
- if (p == NULL)
+ if (p == nil)
FATAL("out of space in tostring on %s", s);
strcpy(p, s);
return(p);
@@ -393,7 +391,7 @@
int c, n;
char *buf, *bp;
- if ((buf = (char *) malloc(strlen(s)+3)) == NULL)
+ if ((buf = (char *) malloc(strlen(s)+3)) == nil)
FATAL( "out of space in qstring(%s)", s);
for (bp = buf; (c = *s) != delim; s++) {
if (c == '\n')
@@ -429,6 +427,6 @@
}
}
}
- *bp++ = 0;
+ *bp = 0;
return buf;
}
--- a/sys/src/cmd/upas/bayes/addhash.c
+++ b/sys/src/cmd/upas/bayes/addhash.c
@@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
#include "hash.h"
Hash hash;
--- a/sys/src/cmd/upas/bayes/bayes.c
+++ b/sys/src/cmd/upas/bayes/bayes.c
@@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
#include "hash.h"
enum
--- a/sys/src/cmd/upas/bayes/dfa.c
+++ b/sys/src/cmd/upas/bayes/dfa.c
@@ -2,8 +2,8 @@
#include <libc.h>
#include <bin.h>
#include <bio.h>
-#include <regexp.h>
-#include "/sys/src/libregexp/regcomp.h"
+#include "regexp.h"
+#include "regcomp.h"
#include "dfa.h"
void rdump(Reprog*);
--- a/sys/src/cmd/upas/bayes/dump.c
+++ b/sys/src/cmd/upas/bayes/dump.c
@@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
#include "/sys/src/libregexp/regcomp.h"
#include "dfa.h"
--- a/sys/src/cmd/upas/bayes/msgtok.c
+++ b/sys/src/cmd/upas/bayes/msgtok.c
@@ -7,7 +7,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
#include <ctype.h>
#include "dfa.h"
--- a/sys/src/cmd/upas/bayes/regcomp.c
+++ b/sys/src/cmd/upas/bayes/regcomp.c
@@ -4,7 +4,7 @@
#include <u.h>
#include <libc.h>
#include "regexp.h"
-#include "/sys/src/libregexp/regcomp.h"
+#include "regcomp.h"
#define TRUE 1
#define FALSE 0
--- /dev/null
+++ b/sys/src/cmd/upas/bayes/regcomp.h
@@ -1,0 +1,63 @@
+/*
+ * substitution list
+ */
+#define NSUBEXP 32
+typedef struct Resublist Resublist;
+struct Resublist
+{
+ Resub m[NSUBEXP];
+};
+
+/*
+ * Actions and Tokens (Reinst types)
+ *
+ * 02xx are operators, value == precedence
+ * 03xx are tokens, i.e. operands for operators
+ */
+#define RUNE 0177
+#define OPERATOR 0200 /* Bitmask of all operators */
+#define START 0200 /* Start, used for marker on stack */
+#define RBRA 0201 /* Right bracket, ) */
+#define LBRA 0202 /* Left bracket, ( */
+#define OR 0203 /* Alternation, | */
+#define CAT 0204 /* Concatentation, implicit operator */
+#define STAR 0205 /* Closure, * */
+#define PLUS 0206 /* a+ == aa* */
+#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */
+#define ANY 0300 /* Any character except newline, . */
+#define ANYNL 0301 /* Any character including newline, . */
+#define NOP 0302 /* No operation, internal use only */
+#define BOL 0303 /* Beginning of line, ^ */
+#define EOL 0304 /* End of line, $ */
+#define CCLASS 0305 /* Character class, [] */
+#define NCCLASS 0306 /* Negated character class, [] */
+#define END 0377 /* Terminate: match found */
+
+/*
+ * regexec execution lists
+ */
+#define LISTSIZE 10
+#define BIGLISTSIZE (25*LISTSIZE)
+typedef struct Relist Relist;
+struct Relist
+{
+ Reinst* inst; /* Reinstruction of the thread */
+ Resublist se; /* matched subexpressions in this thread */
+};
+typedef struct Reljunk Reljunk;
+struct Reljunk
+{
+ Relist* relist[2];
+ Relist* reliste[2];
+ int starttype;
+ Rune startchar;
+ char* starts;
+ char* eol;
+ Rune* rstarts;
+ Rune* reol;
+};
+
+extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
+extern void _renewmatch(Resub*, int, Resublist*);
+extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
+extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
--- a/sys/src/cmd/upas/bayes/regen.c
+++ b/sys/src/cmd/upas/bayes/regen.c
@@ -1,7 +1,7 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
#include "dfa.h"
/***
--- /dev/null
+++ b/sys/src/cmd/upas/bayes/regexp.h
@@ -1,0 +1,66 @@
+#pragma src "/sys/src/oldlibregexp"
+#pragma lib "oldlibregexp.a"
+
+typedef struct Resub Resub;
+typedef struct Reclass Reclass;
+typedef struct Reinst Reinst;
+typedef struct Reprog Reprog;
+
+/*
+ * Sub expression matches
+ */
+struct Resub{
+ union
+ {
+ char *sp;
+ Rune *rsp;
+ };
+ union
+ {
+ char *ep;
+ Rune *rep;
+ };
+};
+
+/*
+ * character class, each pair of rune's defines a range
+ */
+struct Reclass{
+ Rune *end;
+ Rune spans[64];
+};
+
+/*
+ * Machine instructions
+ */
+struct Reinst{
+ int type;
+ union {
+ Reclass *cp; /* class pointer */
+ Rune r; /* character */
+ int subid; /* sub-expression id for RBRA and LBRA */
+ Reinst *right; /* right child of OR */
+ };
+ union { /* regexp relies on these two being in the same union */
+ Reinst *left; /* left child of OR */
+ Reinst *next; /* next instruction for CAT & LBRA */
+ };
+};
+
+/*
+ * Reprogram definition
+ */
+struct Reprog{
+ Reinst *startinst; /* start pc */
+ Reclass class[16]; /* .data */
+ Reinst firstinst[5]; /* .text */
+};
+
+extern Reprog *regcomp(char*);
+extern Reprog *regcomplit(char*);
+extern Reprog *regcompnl(char*);
+extern void regerror(char*);
+extern int regexec(Reprog*, char*, Resub*, int);
+extern void regsub(char*, char*, int, Resub*, int);
+extern int rregexec(Reprog*, Rune*, Resub*, int);
+extern void rregsub(Rune*, Rune*, int, Resub*, int);
--- a/sys/src/libregexp/mkfile
+++ b/sys/src/libregexp/mkfile
@@ -6,12 +6,12 @@
regerror.$O\
regexec.$O\
regsub.$O\
- regaux.$O\
rregexec.$O\
rregsub.$O\
+ regprint.$O\
HFILES=/sys/include/regexp.h\
- regcomp.h\
+ regimpl.h\
UPDATE=\
mkfile\
@@ -20,9 +20,3 @@
${LIB:/$objtype/%=/386/%}\
</sys/src/cmd/mksyslib
-
-test: test.$O $OFILES
- $LD -o test $prereq
-
-test2: test2.$O $OFILES
- $LD -o test2 $prereq
--- a/sys/src/libregexp/regaux.c
+++ /dev/null
@@ -1,113 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-
-/*
- * save a new match in mp
- */
-extern void
-_renewmatch(Resub *mp, int ms, Resublist *sp)
-{
- int i;
-
- if(mp==0 || ms<=0)
- return;
- if(mp[0].sp==0 || sp->m[0].sp<mp[0].sp ||
- (sp->m[0].sp==mp[0].sp && sp->m[0].ep>mp[0].ep)){
- for(i=0; i<ms && i<NSUBEXP; i++)
- mp[i] = sp->m[i];
- for(; i<ms; i++)
- mp[i].sp = mp[i].ep = 0;
- }
-}
-
-/*
- * Note optimization in _renewthread:
- * *lp must be pending when _renewthread called; if *l has been looked
- * at already, the optimization is a bug.
- */
-extern Relist*
-_renewthread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- int ms,
- Resublist *sep) /* pointers to subexpressions */
-{
- Relist *p;
-
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if(sep->m[0].sp < p->se.m[0].sp){
- if(ms > 1)
- p->se = *sep;
- else
- p->se.m[0] = sep->m[0];
- }
- return 0;
- }
- }
- p->inst = ip;
- if(ms > 1)
- p->se = *sep;
- else
- p->se.m[0] = sep->m[0];
- (++p)->inst = 0;
- return p;
-}
-
-/*
- * same as renewthread, but called with
- * initial empty start pointer.
- */
-extern Relist*
-_renewemptythread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- int ms,
- char *sp) /* pointers to subexpressions */
-{
- Relist *p;
-
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if(sp < p->se.m[0].sp) {
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].sp = sp;
- }
- return 0;
- }
- }
- p->inst = ip;
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].sp = sp;
- (++p)->inst = 0;
- return p;
-}
-
-extern Relist*
-_rrenewemptythread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- int ms,
- Rune *rsp) /* pointers to subexpressions */
-{
- Relist *p;
-
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if(rsp < p->se.m[0].rsp) {
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].rsp = rsp;
- }
- return 0;
- }
- }
- p->inst = ip;
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].rsp = rsp;
- (++p)->inst = 0;
- return p;
-}
--- a/sys/src/libregexp/regcomp.c
+++ b/sys/src/libregexp/regcomp.c
@@ -1,567 +1,576 @@
#include <u.h>
#include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
+#include <regexp.h>
+#include "regimpl.h"
-#define TRUE 1
-#define FALSE 0
+int instrcnt[] = {
+ [TANY] 2,
+ [TBOL] 1,
+ [TCAT] 0,
+ [TCLASS] 1,
+ [TEOL] 1,
+ [TNOTNL] 1,
+ [TOR] 2,
+ [TPLUS] 1,
+ [TQUES] 1,
+ [TRUNE] 1,
+ [TSTAR] 2,
+ [TSUB] 2
+};
-/*
- * Parser Information
- */
-typedef
-struct Node
+static Renode*
+node(Parselex *plex, int op, Renode *l, Renode *r)
{
- Reinst* first;
- Reinst* last;
-}Node;
+ Renode *n;
-/* max character classes per program is nelem(reprog->class) */
-static Reprog *reprog;
-
-/* max rune ranges per character class is nelem(classp->spans)/2 */
-#define NCCRUNE nelem(classp->spans)
-
-#define NSTACK 20
-static Node andstack[NSTACK];
-static Node *andp;
-static int atorstack[NSTACK];
-static int* atorp;
-static int cursubid; /* id of current subexpression */
-static int subidstack[NSTACK]; /* parallel to atorstack */
-static int* subidp;
-static int lastwasand; /* Last token was operand */
-static int nbra;
-static char* exprp; /* pointer to next character in source expression */
-static int lexdone;
-static int nclass;
-static Reclass*classp;
-static Reinst* freep;
-static int errors;
-static Rune yyrune; /* last lex'd rune */
-static Reclass*yyclassp; /* last lex'd class */
-
-/* predeclared crap */
-static void operator(int);
-static void pushand(Reinst*, Reinst*);
-static void pushator(int);
-static void evaluntil(int);
-static int bldcclass(void);
-
-static jmp_buf regkaboom;
-
-static void
-rcerror(char *s)
-{
- errors++;
- regerror(s);
- longjmp(regkaboom, 1);
+ plex->instrs += instrcnt[op];
+ n = plex->next++;
+ n->op = op;
+ n->left = l;
+ n->right = r;
+ return n;
}
-static Reinst*
-newinst(int t)
+static Renode*
+e3(Parselex *plex)
{
- freep->type = t;
- freep->left = 0;
- freep->right = 0;
- return freep++;
+ char error[128];
+ Renode *n;
+
+ switch(lex(plex)) {
+ case LANY:
+ return node(plex, TANY, nil, nil);
+ case LBOL:
+ return node(plex, TBOL, nil, nil);
+ case LEOL:
+ return node(plex, TEOL, nil, nil);
+ case LRUNE:
+ n = node(plex, TRUNE, nil, nil);
+ n->r = plex->rune;
+ return n;
+ case LCLASS:
+ if(plex->nc)
+ return buildclassn(plex);
+ return buildclass(plex);
+ case LLPAR:
+ n = e0(plex);
+ n = node(plex, TSUB, n, nil);
+ if(lex(plex) != LRPAR) {
+ snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
+ regerror(error);
+ longjmp(plex->exitenv, 1);
+ }
+ return n;
+ default:
+ if(plex->rune)
+ snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
+ else
+ snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
+ regerror(error);
+ longjmp(plex->exitenv, 1);
+ }
+ return nil;
}
-static void
-operand(int t)
+static Renode*
+e2(Parselex *plex)
{
- Reinst *i;
+ Renode *n;
- if(lastwasand)
- operator(CAT); /* catenate is implicit */
- i = newinst(t);
-
- if(t == CCLASS || t == NCCLASS)
- i->cp = yyclassp;
- if(t == RUNE)
- i->r = yyrune;
-
- pushand(i, i);
- lastwasand = TRUE;
+ n = e3(plex);
+ if(lex(plex) == LREP) {
+ switch(plex->rune) {
+ case L'*':
+ return node(plex, TSTAR, n, nil);
+ case L'+':
+ return node(plex, TPLUS, n, nil);
+ case L'?':
+ return node(plex, TQUES, n, nil);
+ }
+ }
+ plex->peek = 1;
+ return n;
}
-static void
-operator(int t)
+static Renode*
+invert(Renode *n)
{
- if(t==RBRA && --nbra<0)
- rcerror("unmatched right paren");
- if(t==LBRA){
- if(++cursubid >= NSUBEXP)
- rcerror ("too many subexpressions");
- nbra++;
- if(lastwasand)
- operator(CAT);
- } else
- evaluntil(t);
- if(t != RBRA)
- pushator(t);
- lastwasand = FALSE;
- if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
- lastwasand = TRUE; /* these look like operands */
+ Renode *n1;
+
+ if(n->op != TCAT)
+ return n;
+ while(n->left->op == TCAT) {
+ n1 = n->left;
+ n->left = n1->right;
+ n1->right = n;
+ n = n1;
+ }
+ return n;
}
-static void
-regerr2(char *s, int c)
+static Renode*
+e1(Parselex *plex)
{
- char buf[100];
- char *cp = buf;
- while(*s)
- *cp++ = *s++;
- *cp++ = c;
- *cp = '\0';
- rcerror(buf);
+ Renode *n;
+ int sym;
+
+ n = e2(plex);
+ for(;;) {
+ sym = lex(plex);
+ if(sym == LEND || sym == LOR || sym == LRPAR)
+ break;
+ plex->peek = 1;
+ n = node(plex, TCAT, n, e2(plex));
+ }
+ plex->peek = 1;
+ return invert(n);
}
-static void
-cant(char *s)
+static Renode*
+e0(Parselex *plex)
{
- char buf[100];
- strcpy(buf, "can't happen: ");
- strcat(buf, s);
- rcerror(buf);
+ Renode *n;
+
+ n = e1(plex);
+ for(;;) {
+ if(lex(plex) != LOR)
+ break;
+ n = node(plex, TOR, n, e1(plex));
+ }
+ plex->peek = 1;
+ return n;
}
-static void
-pushand(Reinst *f, Reinst *l)
+static Parselex*
+initplex(Parselex *plex, char *regstr, int lit)
{
- if(andp >= &andstack[NSTACK])
- cant("operand stack overflow");
- andp->first = f;
- andp->last = l;
- andp++;
+ plex->getnextr = lit ? getnextrlit : getnextr;
+ plex->rawexp = plex->orig = regstr;
+ plex->sub = 0;
+ plex->instrs = 0;
+ plex->peek = 0;
+ plex->done = 0;
+ return plex;
}
-static void
-pushator(int t)
+static int
+maxthreads(Renode *tree)
{
- if(atorp >= &atorstack[NSTACK])
- cant("operator stack overflow");
- *atorp++ = t;
- *subidp++ = cursubid;
+ tree = tree->left;
+ if(tree->op == TCAT)
+ tree = tree->left;
+ if(tree->op == TBOL)
+ return 2;
+ return -1;
}
-static Node*
-popand(int op)
+static Reprog*
+regcomp1(char *regstr, int nl, int lit)
{
- Reinst *inst;
+ Reprog *reprog;
+ Parselex plex;
+ Renode *parsetr;
+ int regstrlen, maxthr;
- if(andp <= &andstack[0]){
- regerr2("missing operand for ", op);
- inst = newinst(NOP);
- pushand(inst,inst);
+ regstrlen = utflen(regstr);
+ initplex(&plex, regstr, lit);
+ plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
+ if(plex.nodes == nil)
+ return nil;
+ plex.next = plex.nodes;
+
+ if(setjmp(plex.exitenv) != 0) {
+ free(plex.nodes);
+ return nil;
}
- return --andp;
+
+ maxthr = regstrlen;
+ parsetr = node(&plex, TSUB, e0(&plex), nil);
+
+// prtree(parsetr, 0, 1);
+ reprog = malloc(sizeof(Reprog) +
+ sizeof(Reinst) * plex.instrs +
+ sizeof(Rethread) * maxthr);
+ reprog->len = plex.instrs;
+ reprog->nthr = maxthr;
+ reprog->startinst = compile(parsetr, reprog, nl);
+ reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
+ reprog->regstr = regstr;
+
+ free(plex.nodes);
+ return reprog;
}
-static int
-popator(void)
+Reprog*
+regcomp(char *str)
{
- if(atorp <= &atorstack[0])
- cant("operator stack underflow");
- --subidp;
- return *--atorp;
+ return regcomp1(str, 0, 0);
}
-static void
-evaluntil(int pri)
+Reprog*
+regcomplit(char *str)
{
- Node *op1, *op2;
- Reinst *inst1, *inst2;
+ return regcomp1(str, 0, 1);
+}
- while(pri==RBRA || atorp[-1]>=pri){
- switch(popator()){
- default:
- rcerror("unknown operator in evaluntil");
- break;
- case LBRA: /* must have been RBRA */
- op1 = popand('(');
- inst2 = newinst(RBRA);
- inst2->subid = *subidp;
- op1->last->next = inst2;
- inst1 = newinst(LBRA);
- inst1->subid = *subidp;
- inst1->next = op1->first;
- pushand(inst1, inst2);
- return;
- case OR:
- op2 = popand('|');
- op1 = popand('|');
- inst2 = newinst(NOP);
- op2->last->next = inst2;
- op1->last->next = inst2;
- inst1 = newinst(OR);
- inst1->right = op1->first;
- inst1->left = op2->first;
- pushand(inst1, inst2);
- break;
- case CAT:
- op2 = popand(0);
- op1 = popand(0);
- op1->last->next = op2->first;
- pushand(op1->first, op2->last);
- break;
- case STAR:
- op2 = popand('*');
- inst1 = newinst(OR);
- op2->last->next = inst1;
- inst1->right = op2->first;
- pushand(inst1, inst1);
- break;
- case PLUS:
- op2 = popand('+');
- inst1 = newinst(OR);
- op2->last->next = inst1;
- inst1->right = op2->first;
- pushand(op2->first, inst1);
- break;
- case QUEST:
- op2 = popand('?');
- inst1 = newinst(OR);
- inst2 = newinst(NOP);
- inst1->left = inst2;
- inst1->right = op2->first;
- op2->last->next = inst2;
- pushand(inst1, inst2);
- break;
- }
- }
+Reprog*
+regcompnl(char *str)
+{
+ return regcomp1(str, 1, 0);
}
-static Reprog*
-optimize(Reprog *pp)
+static Reinst*
+compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
{
- Reinst *inst, *target;
- int size;
- Reprog *npp;
- Reclass *cl;
- int diff;
+ Reinst *i;
+ int s;
- /*
- * get rid of NOOP chains
- */
- for(inst=pp->firstinst; inst->type!=END; inst++){
- target = inst->next;
- while(target->type == NOP)
- target = target->next;
- inst->next = target;
+Tailcall:
+ if(renode == nil)
+ return reinst;
+ switch(renode->op) {
+ case TCLASS:
+ reinst->op = OCLASS;
+ reinst->r = renode->r;
+ reinst->r1 = renode->r1;
+ reinst->a = reinst + 1 + renode->nclass;
+ renode = renode->left;
+ reinst++;
+ goto Tailcall;
+ case TCAT:
+ reinst = compile1(renode->left, reinst, sub, nl);
+ renode = renode->right;
+ goto Tailcall;
+ case TOR:
+ reinst->op = OSPLIT;
+ reinst->a = reinst + 1;
+ i = compile1(renode->left, reinst->a, sub, nl);
+ reinst->b = i + 1;
+ i->op = OJMP;
+ i->a = compile1(renode->right, reinst->b, sub, nl);
+ return i->a;
+ case TSTAR:
+ reinst->op = OSPLIT;
+ reinst->a = reinst + 1;
+ i = compile1(renode->left, reinst->a, sub, nl);
+ reinst->b = i + 1;
+ i->op = OJMP;
+ i->a = reinst;
+ return reinst->b;
+ case TPLUS:
+ i = reinst;
+ reinst = compile1(renode->left, reinst, sub, nl);
+ reinst->op = OSPLIT;
+ reinst->a = i;
+ reinst->b = reinst + 1;
+ return reinst->b;
+ case TQUES:
+ reinst->op = OSPLIT;
+ reinst->a = reinst + 1;
+ reinst->b = compile1(renode->left, reinst->a, sub, nl);
+ return reinst->b;
+ case TSUB:
+ reinst->op = OSAVE;
+ reinst->sub = s = (*sub)++;
+ reinst = compile1(renode->left, reinst+1, sub, nl);
+ reinst->op = OUNSAVE;
+ reinst->sub = s;
+ return reinst + 1;
+ case TANY:
+ if(nl == 0)
+ reinst++->op = ONOTNL;
+ reinst->op = OANY;
+ return reinst + 1;
+ case TRUNE:
+ reinst->op = ORUNE;
+ reinst->r = renode->r;
+ return reinst + 1;
+ case TNOTNL:
+ reinst->op = ONOTNL;
+ return reinst + 1;
+ case TEOL:
+ reinst->op = OEOL;
+ return reinst + 1;
+ case TBOL:
+ reinst->op = OBOL;
+ return reinst + 1;
}
-
- /*
- * The original allocation is for an area larger than
- * necessary. Reallocate to the actual space used
- * and then relocate the code.
- */
- size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
- npp = realloc(pp, size);
- if(npp==0 || npp==pp)
- return pp;
- diff = (char *)npp - (char *)pp;
- freep = (Reinst *)((char *)freep + diff);
- for(inst=npp->firstinst; inst<freep; inst++){
- switch(inst->type){
- case OR:
- case STAR:
- case PLUS:
- case QUEST:
- *(char **)&inst->right += diff;
- break;
- case CCLASS:
- case NCCLASS:
- *(char **)&inst->right += diff;
- cl = inst->cp;
- *(char **)&cl->end += diff;
- break;
- }
- *(char **)&inst->left += diff;
- }
- *(char **)&npp->startinst += diff;
- return npp;
+ return nil;
}
-#ifdef DEBUG
-static void
-dumpstack(void){
- Node *stk;
- int *ip;
-
- print("operators\n");
- for(ip=atorstack; ip<atorp; ip++)
- print("0%o\n", *ip);
- print("operands\n");
- for(stk=andstack; stk<andp; stk++)
- print("0%o\t0%o\n", stk->first->type, stk->last->type);
-}
-
-static void
-dump(Reprog *pp)
+static Reinst*
+compile(Renode *parsetr, Reprog *reprog, int nl)
{
- Reinst *l;
- Rune *p;
+ Reinst *reinst;
+ int sub;
- l = pp->firstinst;
- do{
- print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
- l->left-pp->firstinst, l->right-pp->firstinst);
- if(l->type == RUNE)
- print("\t%C\n", l->r);
- else if(l->type == CCLASS || l->type == NCCLASS){
- print("\t[");
- if(l->type == NCCLASS)
- print("^");
- for(p = l->cp->spans; p < l->cp->end; p += 2)
- if(p[0] == p[1])
- print("%C", p[0]);
- else
- print("%C-%C", p[0], p[1]);
- print("]\n");
- } else
- print("\n");
- }while(l++->type);
+ sub = 0;
+ reinst = (Reinst*)(reprog+1);
+ compile1(parsetr, reinst, &sub, nl);
+ return reinst;
}
-#endif
-static Reclass*
-newclass(void)
+static void
+getnextr(Parselex *l)
{
- if(nclass >= nelem(reprog->class))
- rcerror("too many character classes; increase Reprog.class size");
- return &(classp[nclass++]);
+ l->literal = 0;
+ if(l->done) {
+ l->rune = 0;
+ return;
+ }
+ l->rawexp += chartorune(&l->rune, l->rawexp);
+ if(l->rune == L'\\') {
+ l->rawexp += chartorune(&l->rune, l->rawexp);
+ l->literal = 1;
+ }
+ if(*l->rawexp == 0)
+ l->done = 1;
+ return;
}
-static int
-nextc(Rune *rp)
+static void
+getnextrlit(Parselex *l)
{
- if(lexdone){
- *rp = 0;
- return 1;
+ l->literal = 1;
+ if(l->done) {
+ l->literal = 0;
+ l->rune = 0;
+ return;
}
- exprp += chartorune(rp, exprp);
- if(*rp == L'\\'){
- exprp += chartorune(rp, exprp);
- return 1;
- }
- if(*rp == 0)
- lexdone = 1;
- return 0;
+ l->rawexp += chartorune(&l->rune, l->rawexp);
+ if(*l->rawexp == 0)
+ l->done = 1;
+ return;
}
-static int
-lex(int literal, int dot_type)
+static int
+lex(Parselex *l)
{
- int quoted;
-
- quoted = nextc(&yyrune);
- if(literal || quoted){
- if(yyrune == 0)
- return END;
- return RUNE;
+ if(l->peek) {
+ l->peek = 0;
+ return l->peeklex;
}
-
- switch(yyrune){
+ l->getnextr(l);
+ if(l->literal)
+ return l->peeklex = LRUNE;
+ switch(l->rune){
case 0:
- return END;
+ return l->peeklex = LEND;
case L'*':
- return STAR;
case L'?':
- return QUEST;
case L'+':
- return PLUS;
+ return l->peeklex = LREP;
case L'|':
- return OR;
+ return l->peeklex = LOR;
case L'.':
- return dot_type;
+ return l->peeklex = LANY;
case L'(':
- return LBRA;
+ return l->peeklex = LLPAR;
case L')':
- return RBRA;
+ return l->peeklex = LRPAR;
case L'^':
- return BOL;
+ return l->peeklex = LBOL;
case L'$':
- return EOL;
+ return l->peeklex = LEOL;
case L'[':
- return bldcclass();
+ getclass(l);
+ return l->peeklex = LCLASS;
}
- return RUNE;
+ return l->peeklex = LRUNE;
}
static int
-bldcclass(void)
+pcmp(void *va, void *vb)
{
- int type;
- Rune r[NCCRUNE];
- Rune *p, *ep, *np;
- Rune rune;
- int quoted;
+ vlong n;
+ Rune *a, *b;
- /* we have already seen the '[' */
- type = CCLASS;
- yyclassp = newclass();
+ a = va;
+ b = vb;
- /* look ahead for negation */
- /* SPECIAL CASE!!! negated classes don't match \n */
- ep = r;
- quoted = nextc(&rune);
- if(!quoted && rune == L'^'){
- type = NCCLASS;
- quoted = nextc(&rune);
- *ep++ = L'\n';
- *ep++ = L'\n';
- }
+ n = (vlong)b[0] - (vlong)a[0];
+ if(n)
+ return n;
+ return (vlong)b[1] - (vlong)a[1];
+}
- /* parse class into a set of spans */
- while(ep < &r[NCCRUNE-1]){
- if(rune == 0){
- rcerror("malformed '[]'");
- return 0;
- }
- if(!quoted && rune == L']')
+static void
+getclass(Parselex *l)
+{
+ Rune *p, *q, t;
+
+ l->nc = 0;
+ getnextrlit(l);
+ if(l->rune == L'^') {
+ l->nc = 1;
+ getnextrlit(l);
+ }
+ p = l->cpairs;
+ for(;;) {
+ *p = l->rune;
+ if(l->rune == L']')
break;
- if(!quoted && rune == L'-'){
- if(ep == r){
- rcerror("malformed '[]'");
- return 0;
+ if(l->rune == L'-') {
+ regerror("Malformed class");
+ longjmp(l->exitenv, 1);
+ }
+ if(l->rune == '\\') {
+ getnextrlit(l);
+ *p = l->rune;
+ }
+ if(l->rune == 0) {
+ regerror("No closing ] for class");
+ longjmp(l->exitenv, 1);
+ }
+ getnextrlit(l);
+ if(l->rune == L'-') {
+ getnextrlit(l);
+ if(l->rune == L']') {
+ regerror("Malformed class");
+ longjmp(l->exitenv, 1);
}
- quoted = nextc(&rune);
- if((!quoted && rune == L']') || rune == 0){
- rcerror("malformed '[]'");
- return 0;
+ if(l->rune == L'-') {
+ regerror("Malformed class");
+ longjmp(l->exitenv, 1);
}
- *(ep-1) = rune;
- } else {
- *ep++ = rune;
- *ep++ = rune;
+ if(l->rune == L'\\')
+ getnextrlit(l);
+ p[1] = l->rune;
+ if(p[0] > p[1]) {
+ t = p[0];
+ p[0] = p[1];
+ p[1] = t;
+ }
+ getnextrlit(l);
+ } else
+ p[1] = p[0];
+ if(p >= l->cpairs + nelem(l->cpairs) - 2) {
+ regerror("Class too big\n");
+ longjmp(l->exitenv, 1);
}
- quoted = nextc(&rune);
+ p += 2;
}
- if(ep >= &r[NCCRUNE-1]) {
- rcerror("char class too large; increase Reclass.spans size");
- return 0;
+ *p = L'\0';
+ qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
+ q = l->cpairs;
+ for(p = l->cpairs+2; *p != 0; p += 2) {
+ if(p[1] < q[0] - 1) {
+ q[2] = p[0];
+ q[3] = p[1];
+ q += 2;
+ continue;
+ }
+ q[0] = p[0];
+ if(p[1] > q[1])
+ q[1] = p[1];
}
-
- /* sort on span start */
- for(p = r; p < ep; p += 2){
- for(np = p; np < ep; np += 2)
- if(*np < *p){
- rune = np[0];
- np[0] = p[0];
- p[0] = rune;
- rune = np[1];
- np[1] = p[1];
- p[1] = rune;
- }
- }
-
- /* merge spans */
- np = yyclassp->spans;
- p = r;
- if(r == ep)
- yyclassp->end = np;
- else {
- np[0] = *p++;
- np[1] = *p++;
- for(; p < ep; p += 2)
- /* overlapping or adjacent ranges? */
- if(p[0] <= np[1] + 1){
- if(p[1] >= np[1])
- np[1] = p[1]; /* coalesce */
- } else {
- np += 2;
- np[0] = p[0];
- np[1] = p[1];
- }
- yyclassp->end = np+2;
- }
-
- return type;
+ q[2] = 0;
}
-static Reprog*
-regcomp1(char *s, int literal, int dot_type)
+/* classes are in descending order */
+static Renode*
+buildclassn(Parselex *l)
{
- int token;
- Reprog *pp;
+ Renode *n;
+ Rune *p;
+ int i;
- /* get memory for the program */
- pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
- if(pp == 0){
- regerror("out of memory");
- return 0;
- }
- freep = pp->firstinst;
- classp = pp->class;
- errors = 0;
+ i = 0;
+ p = l->cpairs;
+ n = node(l, TCLASS, nil, nil);
+ n->r = p[1] + 1;
+ n->r1 = Runemax;
+ n->nclass = i++;
- if(setjmp(regkaboom))
- goto out;
-
- /* go compile the sucker */
- lexdone = 0;
- exprp = s;
- nclass = 0;
- nbra = 0;
- atorp = atorstack;
- andp = andstack;
- subidp = subidstack;
- lastwasand = FALSE;
- cursubid = 0;
-
- /* Start with a low priority operator to prime parser */
- pushator(START-1);
- while((token = lex(literal, dot_type)) != END){
- if((token&0300) == OPERATOR)
- operator(token);
- else
- operand(token);
+ for(; *p != 0; p += 2) {
+ n = node(l, TCLASS, n, nil);
+ n->r = p[3] + 1;
+ n->r1 = p[0] - 1;
+ n->nclass = i++;
}
-
- /* Close with a low priority operator */
- evaluntil(START);
-
- /* Force END */
- operand(END);
- evaluntil(START);
-#ifdef DEBUG
- dumpstack();
-#endif
- if(nbra)
- rcerror("unmatched left paren");
- --andp; /* points to first and only operand */
- pp->startinst = andp->first;
-#ifdef DEBUG
- dump(pp);
-#endif
- pp = optimize(pp);
-#ifdef DEBUG
- print("start: %d\n", andp->first-pp->firstinst);
- dump(pp);
-#endif
-out:
- if(errors){
- free(pp);
- pp = 0;
- }
- return pp;
+ n->r = 0;
+ return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
}
-extern Reprog*
-regcomp(char *s)
+static Renode*
+buildclass(Parselex *l)
{
- return regcomp1(s, 0, ANY);
-}
+ Renode *n;
+ Rune *p;
+ int i;
-extern Reprog*
-regcomplit(char *s)
-{
- return regcomp1(s, 1, ANY);
+ i = 0;
+ n = node(l, TCLASS, nil, nil);
+ n->r = Runemax + 1;
+ n->nclass = i++;
+
+ for(p = l->cpairs; *p != 0; p += 2) {
+ n = node(l, TCLASS, n, nil);
+ n->r = p[0];
+ n->r1 = p[1];
+ n->nclass = i++;
+ }
+ return n;
}
-extern Reprog*
-regcompnl(char *s)
+static void
+prtree(Renode *tree, int d, int f)
{
- return regcomp1(s, 0, ANYNL);
+ int i;
+
+ if(tree == nil)
+ return;
+ if(f)
+ for(i = 0; i < d; i++)
+ print("\t");
+ switch(tree->op) {
+ case TCAT:
+ prtree(tree->left, d, 0);
+ prtree(tree->right, d, 1);
+ break;
+ case TOR:
+ print("TOR\n");
+ prtree(tree->left, d+1, 1);
+ for(i = 0; i < d; i++)
+ print("\t");
+ print("|\n");
+ prtree(tree->right, d+1, 1);
+ break;
+ case TSTAR:
+ print("*\n");
+ prtree(tree->left, d+1, 1);
+ break;
+ case TPLUS:
+ print("+\n");
+ prtree(tree->left, d+1, 1);
+ break;
+ case TQUES:
+ print("?\n");
+ prtree(tree->left, d+1, 1);
+ break;
+ case TANY:
+ print(".\n");
+ prtree(tree->left, d+1, 1);
+ break;
+ case TBOL:
+ print("^\n");
+ break;
+ case TEOL:
+ print("$\n");
+ break;
+ case TSUB:
+ print("TSUB\n");
+ prtree(tree->left, d+1, 1);
+ break;
+ case TRUNE:
+ print("TRUNE: %C\n", tree->r);
+ break;
+ case TNOTNL:
+ print("TNOTNL: !\\n\n");
+ break;
+ case TCLASS:
+ print("CLASS: %C-%C\n", tree->r, tree->r1);
+ prtree(tree->left, d, 1);
+ break;
+ }
}
--- a/sys/src/libregexp/regcomp.h
+++ /dev/null
@@ -1,63 +1,0 @@
-/*
- * substitution list
- */
-#define NSUBEXP 32
-typedef struct Resublist Resublist;
-struct Resublist
-{
- Resub m[NSUBEXP];
-};
-
-/*
- * Actions and Tokens (Reinst types)
- *
- * 02xx are operators, value == precedence
- * 03xx are tokens, i.e. operands for operators
- */
-#define RUNE 0177
-#define OPERATOR 0200 /* Bitmask of all operators */
-#define START 0200 /* Start, used for marker on stack */
-#define RBRA 0201 /* Right bracket, ) */
-#define LBRA 0202 /* Left bracket, ( */
-#define OR 0203 /* Alternation, | */
-#define CAT 0204 /* Concatentation, implicit operator */
-#define STAR 0205 /* Closure, * */
-#define PLUS 0206 /* a+ == aa* */
-#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */
-#define ANY 0300 /* Any character except newline, . */
-#define ANYNL 0301 /* Any character including newline, . */
-#define NOP 0302 /* No operation, internal use only */
-#define BOL 0303 /* Beginning of line, ^ */
-#define EOL 0304 /* End of line, $ */
-#define CCLASS 0305 /* Character class, [] */
-#define NCCLASS 0306 /* Negated character class, [] */
-#define END 0377 /* Terminate: match found */
-
-/*
- * regexec execution lists
- */
-#define LISTSIZE 10
-#define BIGLISTSIZE (25*LISTSIZE)
-typedef struct Relist Relist;
-struct Relist
-{
- Reinst* inst; /* Reinstruction of the thread */
- Resublist se; /* matched subexpressions in this thread */
-};
-typedef struct Reljunk Reljunk;
-struct Reljunk
-{
- Relist* relist[2];
- Relist* reliste[2];
- int starttype;
- Rune startchar;
- char* starts;
- char* eol;
- Rune* rstarts;
- Rune* reol;
-};
-
-extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
-extern void _renewmatch(Resub*, int, Resublist*);
-extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
-extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
--- a/sys/src/libregexp/regerror.c
+++ b/sys/src/libregexp/regerror.c
@@ -1,6 +1,6 @@
#include <u.h>
#include <libc.h>
-#include "regexp.h"
+#include <regexp.h>
void
regerror(char *s)
--- a/sys/src/libregexp/regexec.c
+++ b/sys/src/libregexp/regexec.c
@@ -1,232 +1,201 @@
#include <u.h>
#include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
+#include <regexp.h>
+#include "regimpl.h"
+typedef struct RethreadQ RethreadQ;
+struct RethreadQ
+{
+ Rethread *head;
+ Rethread **tail;
+};
-/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
- */
-static int
-regexec1(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j
-)
+int
+regexec(Reprog *prog, char *str, Resub *sem, int msize)
{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- char *s;
- int i, checkstart;
- Rune r, *rp, *ep;
- int n;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
- char *p;
+ RethreadQ lists[2], *clist, *nlist, *tmp;
+ Rethread *t, *next, *pooltop, *avail;
+ Reinst *curinst;
+ Rune r;
+ char *sp, *ep, endc;
+ int i, match, first, gen, matchpri, pri;
- match = 0;
- checkstart = j->starttype;
- if(mp)
- for(i=0; i<ms; i++) {
- mp[i].sp = 0;
- mp[i].ep = 0;
- }
- j->relist[0][0].inst = 0;
- j->relist[1][0].inst = 0;
+ if(msize > NSUBEXPM)
+ msize = NSUBEXPM;
- /* Execute machine once for each character, including terminal NUL */
- s = j->starts;
- do{
- /* fast check for first char */
- if(checkstart) {
- switch(j->starttype) {
- case RUNE:
- p = utfrune(s, j->startchar);
- if(p == 0 || s == j->eol)
- return match;
- s = p;
- break;
- case BOL:
- if(s == bol)
- break;
- p = utfrune(s, '\n');
- if(p == 0 || s == j->eol)
- return match;
- s = p+1;
- break;
- }
- }
- r = *(uchar*)s;
- if(r < Runeself)
- n = 1;
- else
- n = chartorune(&r, s);
+ if(prog->startinst->gen != 0) {
+ for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
+ curinst->gen = 0;
+ }
- /* switch run lists */
- tl = j->relist[flag];
- tle = j->reliste[flag];
- nl = j->relist[flag^=1];
- nle = j->reliste[flag];
- nl->inst = 0;
+ clist = lists;
+ clist->head = nil;
+ clist->tail = &clist->head;
+ nlist = lists + 1;
+ nlist->head = nil;
+ nlist->tail = &nlist->head;
- /* Add first instruction to current list */
- if(match == 0)
- _renewemptythread(tl, progp->startinst, ms, s);
+ pooltop = prog->threads + prog->nthr;
+ avail = nil;
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
- for(inst = tlp->inst; ; inst = inst->next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r == r){
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- }
- break;
- case LBRA:
- tlp->se.m[inst->subid].sp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->subid].ep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(s == j->eol || r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->cp->end;
- for(rp = inst->cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->cp->end;
- for(rp = inst->cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].ep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
+ pri = matchpri = gen = match = 0;
+ sp = str;
+ ep = nil;
+ endc = '\0';
+ if(sem != nil && msize > 0) {
+ if(sem->sp != nil)
+ sp = sem->sp;
+ if(sem->ep != nil && *sem->ep != '\0') {
+ ep = sem->ep;
+ endc = *sem->ep;
+ *sem->ep = '\0';
+ }
+ }
+ r = Runemax + 1;
+ for(; r != L'\0'; sp += i) {
+ gen++;
+ i = chartorune(&r, sp);
+ first = 1;
+ t = clist->head;
+ if(t == nil)
+ goto Start;
+ curinst = t->pc;
+Again:
+ if(curinst->gen == gen)
+ goto Done;
+ curinst->gen = gen;
+ switch(curinst->op) {
+ case ORUNE:
+ if(r != curinst->r)
+ goto Done;
+ case OANY: /* fallthrough */
+ Any:
+ next = t->next;
+ t->pc = curinst + 1;
+ t->next = nil;
+ *nlist->tail = t;
+ nlist->tail = &t->next;
+ if(next == nil)
+ break;
+ t = next;
+ curinst = t->pc;
+ goto Again;
+ case OCLASS:
+ Class:
+ if(r < curinst->r)
+ goto Done;
+ if(r > curinst->r1) {
+ curinst++;
+ goto Class;
+ }
+ next = t->next;
+ t->pc = curinst->a;
+ t->next = nil;
+ *nlist->tail = t;
+ nlist->tail = &t->next;
+ if(next == nil)
+ break;
+ t = next;
+ curinst = t->pc;
+ goto Again;
+ case ONOTNL:
+ if(r != L'\n') {
+ curinst++;
+ goto Again;
+ }
+ goto Done;
+ case OBOL:
+ if(sp == str || sp[-1] == '\n') {
+ curinst++;
+ goto Again;
+ }
+ goto Done;
+ case OEOL:
+ if(r == L'\0' && ep == nil) {
+ curinst++;
+ goto Again;
+ }
+ if(r == L'\n')
+ goto Any;
+ goto Done;
+ case OJMP:
+ curinst = curinst->a;
+ goto Again;
+ case OSPLIT:
+ if(avail == nil)
+ next = --pooltop;
+ else {
+ next = avail;
+ avail = avail->next;
+ }
+ next->pc = curinst->b;
+ if(msize > 0)
+ memcpy(next->sem, t->sem, sizeof(Resub)*msize);
+ next->pri = t->pri;
+ next->next = t->next;
+ t->next = next;
+ curinst = curinst->a;
+ goto Again;
+ case OSAVE:
+ if(curinst->sub < msize)
+ t->sem[curinst->sub].sp = sp;
+ curinst++;
+ goto Again;
+ case OUNSAVE:
+ if(curinst->sub == 0) {
+ /* "Highest" priority is the left-most longest. */
+ if (t->pri > matchpri)
+ goto Done;
+ match = 1;
+ matchpri = t->pri;
+ if(sem != nil && msize > 0) {
+ memcpy(sem, t->sem, sizeof(Resub)*msize);
+ sem->ep = sp;
}
+ goto Done;
+ }
+ if(curinst->sub < msize)
+ t->sem[curinst->sub].ep = sp;
+ curinst++;
+ goto Again;
+ Done:
+ next = t->next;
+ t->next = avail;
+ avail = t;
+ if(next == nil)
break;
+ t = next;
+ curinst = t->pc;
+ goto Again;
+ }
+Start:
+ /* Start again once if we haven't found anything. */
+ if(first == 1 && match == 0) {
+ first = 0;
+ if(avail == nil)
+ t = --pooltop;
+ else {
+ t = avail;
+ avail = avail->next;
}
+ if(msize > 0)
+ memset(t->sem, 0, sizeof(Resub)*msize);
+ /* "Lower" priority thread */
+ t->pri = matchpri = pri++;
+ t->next = nil;
+ curinst = prog->startinst;
+ goto Again;
}
- if(s == j->eol)
+ /* If we have a match and no extant threads, we are done. */
+ if(match == 1 && nlist->head == nil)
break;
- checkstart = j->starttype && nl->inst==0;
- s += n;
- }while(r);
- return match;
-}
-
-static int
-regexec2(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j
-)
-{
- int rv;
- Relist *relist0, *relist1;
-
- /* mark space */
- relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
- if(relist0 == nil)
- return -1;
- relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
- if(relist1 == nil){
- free(relist1);
- return -1;
+ tmp = clist;
+ clist = nlist;
+ nlist = tmp;
+ nlist->head = nil;
+ nlist->tail = &nlist->head;
}
- j->relist[0] = relist0;
- j->relist[1] = relist1;
- j->reliste[0] = relist0 + BIGLISTSIZE - 2;
- j->reliste[1] = relist1 + BIGLISTSIZE - 2;
-
- rv = regexec1(progp, bol, mp, ms, j);
- free(relist0);
- free(relist1);
- return rv;
-}
-
-extern int
-regexec(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
-{
- Reljunk j;
- Relist relist0[LISTSIZE], relist1[LISTSIZE];
- int rv;
-
- /*
- * use user-specified starting/ending location if specified
- */
- j.starts = bol;
- j.eol = 0;
- if(mp && ms>0){
- if(mp->sp)
- j.starts = mp->sp;
- if(mp->ep)
- j.eol = mp->ep;
- }
- j.starttype = 0;
- j.startchar = 0;
- if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
- j.starttype = RUNE;
- j.startchar = progp->startinst->r;
- }
- if(progp->startinst->type == BOL)
- j.starttype = BOL;
-
- /* mark space */
- j.relist[0] = relist0;
- j.relist[1] = relist1;
- j.reliste[0] = relist0 + nelem(relist0) - 2;
- j.reliste[1] = relist1 + nelem(relist1) - 2;
-
- rv = regexec1(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- rv = regexec2(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- return -1;
+ if(ep != nil)
+ *ep = endc;
+ return match;
}
--- /dev/null
+++ b/sys/src/libregexp/regimpl.h
@@ -1,0 +1,104 @@
+enum
+{
+ LANY = 0,
+ LBOL,
+ LCLASS,
+ LEND,
+ LEOL,
+ LLPAR,
+ LOR,
+ LREP,
+ LRPAR,
+ LRUNE,
+
+ TANY = 0,
+ TBOL,
+ TCAT,
+ TCLASS,
+ TEOL,
+ TNOTNL,
+ TOR,
+ TPLUS,
+ TQUES,
+ TRUNE,
+ TSTAR,
+ TSUB,
+
+ NSUBEXPM = 32
+};
+
+typedef struct Parselex Parselex;
+typedef struct Renode Renode;
+
+struct Parselex
+{
+ /* Parse */
+ Renode *next;
+ Renode *nodes;
+ int sub;
+ int instrs;
+ jmp_buf exitenv;
+ /* Lex */
+ void (*getnextr)(Parselex*);
+ char *rawexp;
+ char *orig;
+ Rune rune;
+ Rune peek;
+ int peeklex;
+ int done;
+ int literal;
+ Rune cpairs[400+2];
+ int nc;
+};
+struct Renode
+{
+ int op;
+ Renode *left;
+ Rune r;
+ union
+ {
+ Rune r1;
+ int sub;
+ Renode *right;
+ };
+ int nclass;
+};
+struct Rethread
+{
+ Reinst *pc;
+ Resub sem[NSUBEXPM];
+ int pri;
+ Rethread *next;
+};
+struct Reinst
+{
+ char op;
+ int gen;
+ Reinst *a;
+ union
+ {
+ Rune r;
+ int sub;
+ };
+ union
+ {
+ Rune r1;
+ Reinst *b;
+ };
+};
+
+static int lex(Parselex*);
+static void getnextr(Parselex*);
+static void getnextrlit(Parselex*);
+static void getclass(Parselex*);
+static Renode *e0(Parselex*);
+static Renode *e1(Parselex*);
+static Renode *e2(Parselex*);
+static Renode *e3(Parselex*);
+static Renode *buildclass(Parselex*);
+static Renode *buildclassn(Parselex*);
+static int pcmp(void*, void*);
+static Reprog *regcomp1(char*, int, int);
+static Reinst *compile(Renode*, Reprog*, int);
+static Reinst *compile1(Renode*, Reinst*, int*, int);
+static void prtree(Renode*, int, int);
--- /dev/null
+++ b/sys/src/libregexp/regprint.c
@@ -1,0 +1,66 @@
+#include <u.h>
+#include <libc.h>
+#include <regexp.h>
+#include <regimpl.h>
+
+static int
+fmtprinst(Fmt *f, Reinst *inst)
+{
+ int r;
+
+ r = fmtprint(f, "%p ", inst);
+ switch(inst->op) {
+ case ORUNE:
+ r += fmtprint(f, "ORUNE\t%C\n", inst->r);
+ break;
+ case ONOTNL:
+ r += fmtprint(f, "ONOTNL\n");
+ break;
+ case OCLASS:
+ r += fmtprint(f, "OCLASS\t%C-%C %p\n", inst->r, inst->r1, inst->a);
+ break;
+ case OSPLIT:
+ r += fmtprint(f, "OSPLIT\t%p %p\n", inst->a, inst->b);
+ break;
+ case OJMP:
+ r += fmtprint(f, "OJMP \t%p\n", inst->a);
+ break;
+ case OSAVE:
+ r += fmtprint(f, "OSAVE\t%d\n", inst->sub);
+ break;
+ case OUNSAVE:
+ r += fmtprint(f, "OUNSAVE\t%d\n", inst->sub);
+ break;
+ case OANY:
+ r += fmtprint(f, "OANY \t.\n");
+ break;
+ case OEOL:
+ r += fmtprint(f, "OEOL \t$\n");
+ break;
+ case OBOL:
+ r += fmtprint(f, "OBOL \t^\n");
+ break;
+ }
+ return r;
+}
+
+static int
+fmtprprog(Fmt *f, Reprog *reprog)
+{
+ Reinst *inst;
+ int r;
+
+ r = 0;
+ for(inst = reprog->startinst; inst < reprog->startinst + reprog->len; inst++)
+ r += fmtprinst(f, inst);
+ return r;
+}
+
+int
+reprogfmt(Fmt *f)
+{
+ Reprog *r;
+
+ r = va_arg(f->args, Reprog*);
+ return fmtprprog(f, r);
+}
--- a/sys/src/libregexp/regsub.c
+++ b/sys/src/libregexp/regsub.c
@@ -1,63 +1,66 @@
#include <u.h>
#include <libc.h>
-#include "regexp.h"
+#include <regexp.h>
-/* substitute into one string using the matches from the last regexec() */
-extern void
-regsub(char *sp, /* source string */
- char *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
+void
+regsub(char *src, char *dst, int dlen, Resub *match, int msize)
{
- char *ssp, *ep;
int i;
+ char *ep, c;
- ep = dp+dlen-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp!=0 && mp[i].sp != 0 && ms>i)
- for(ssp = mp[i].sp;
- ssp < mp[i].ep;
- ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
+ ep = dst + dlen-1;
+ for(;*src != '\0'; src++) switch(*src) {
+ case '\\':
+ switch(*++src) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ i = *src - '0';
+ if(match != nil && i < msize && match[i].ep != nil) {
+ c = *match[i].ep;
+ *match[i].ep = '\0';
+ dst = strecpy(dst, ep+1, match[i].sp);
+ *match[i].ep = c;
}
- }else if(*sp == '&'){
- if(mp!=0 && mp[0].sp != 0 && ms>0)
- for(ssp = mp[0].sp;
- ssp < mp[0].ep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
+ break;
+ case '\\':
+ if(dst < ep)
+ *dst++ = '\\';
+ else
+ goto End;
+ break;
+ case '\0':
+ goto End;
+ default:
+ if(dst < ep)
+ *dst++ = *src;
+ else
+ goto End;
+ break;
}
- sp++;
+ break;
+ case '&':
+ if(match != nil && msize > 0 && match[0].sp != nil) {
+ c = *match[0].ep;
+ *match[0].ep = '\0';
+ dst = strecpy(dst, ep+1, match[0].sp);
+ *match[0].ep = c;
+ }
+ break;
+ default:
+ if(dst < ep)
+ *dst++ = *src;
+ else
+ goto End;
+ break;
}
- *dp = '\0';
+End:
+ *dst = '\0';
}
--- a/sys/src/libregexp/rregexec.c
+++ b/sys/src/libregexp/rregexec.c
@@ -1,212 +1,200 @@
#include <u.h>
#include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
+#include <regexp.h>
+#include "regimpl.h"
-/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
- */
-static int
-rregexec1(Reprog *progp, /* program to run */
- Rune *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j)
+typedef struct RethreadQ RethreadQ;
+struct RethreadQ
{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- Rune *s;
- int i, checkstart;
- Rune r, *rp, *ep;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
- Rune *p;
+ Rethread *head;
+ Rethread **tail;
+};
- match = 0;
- checkstart = j->startchar;
- if(mp)
- for(i=0; i<ms; i++) {
- mp[i].rsp = 0;
- mp[i].rep = 0;
- }
- j->relist[0][0].inst = 0;
- j->relist[1][0].inst = 0;
+int
+rregexec(Reprog *prog, Rune *str, Resub *sem, int msize)
+{
+ RethreadQ lists[2], *clist, *nlist, *tmp;
+ Rethread *t, *next, *pooltop, *avail;
+ Reinst *curinst;
+ Rune *rsp, *rep, endr, last;
+ int match, first, gen, pri, matchpri;
- /* Execute machine once for each character, including terminal NUL */
- s = j->rstarts;
- do{
- /* fast check for first char */
- if(checkstart) {
- switch(j->starttype) {
- case RUNE:
- p = runestrchr(s, j->startchar);
- if(p == 0 || s == j->reol)
- return match;
- s = p;
- break;
- case BOL:
- if(s == bol)
- break;
- p = runestrchr(s, '\n');
- if(p == 0 || s == j->reol)
- return match;
- s = p+1;
- break;
- }
- }
+ if(msize > NSUBEXPM)
+ msize = NSUBEXPM;
- r = *s;
+ if(prog->startinst->gen != 0) {
+ for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
+ curinst->gen = 0;
+ }
- /* switch run lists */
- tl = j->relist[flag];
- tle = j->reliste[flag];
- nl = j->relist[flag^=1];
- nle = j->reliste[flag];
- nl->inst = 0;
+ clist = lists;
+ clist->head = nil;
+ clist->tail = &clist->head;
+ nlist = lists + 1;
+ nlist->head = nil;
+ nlist->tail = &nlist->head;
- /* Add first instruction to current list */
- _rrenewemptythread(tl, progp->startinst, ms, s);
+ pooltop = prog->threads + prog->nthr;
+ avail = nil;
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){
- for(inst=tlp->inst; ; inst = inst->next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r == r)
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- case LBRA:
- tlp->se.m[inst->subid].rsp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->subid].rep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(s == j->reol || r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->cp->end;
- for(rp = inst->cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->cp->end;
- for(rp = inst->cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].rep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
+ pri = matchpri = gen = match = 0;
+ rsp = str;
+ rep = nil;
+ endr = L'\0';
+ if(sem != nil && msize > 0) {
+ if(sem->rsp != nil)
+ rsp = sem->rsp;
+ if(sem->rep != nil && *sem->rep != L'\0') {
+ rep = sem->rep;
+ endr = *sem->rep;
+ *sem->rep = '\0';
+ }
+ }
+ last = 1;
+ for(; last != L'\0'; rsp++) {
+ gen++;
+ last = *rsp;
+ first = 1;
+ t = clist->head;
+ if(t == nil)
+ goto Start;
+ curinst = t->pc;
+Again:
+ if(curinst->gen == gen)
+ goto Done;
+ curinst->gen = gen;
+ switch(curinst->op) {
+ case ORUNE:
+ if(*rsp != curinst->r)
+ goto Done;
+ case OANY: /* fallthrough */
+ Any:
+ next = t->next;
+ t->pc = curinst + 1;
+ t->next = nil;
+ *nlist->tail = t;
+ nlist->tail = &t->next;
+ if(next == nil)
+ break;
+ t = next;
+ curinst = t->pc;
+ goto Again;
+ case OCLASS:
+ Class:
+ if(*rsp < curinst->r)
+ goto Done;
+ if(*rsp > curinst->r1) {
+ curinst++;
+ goto Class;
+ }
+ next = t->next;
+ t->pc = curinst->a;
+ t->next = nil;
+ *nlist->tail = t;
+ nlist->tail = &t->next;
+ if(next == nil)
+ break;
+ t = next;
+ curinst = t->pc;
+ goto Again;
+ case ONOTNL:
+ if(*rsp != L'\n') {
+ curinst++;
+ goto Again;
+ }
+ goto Done;
+ case OBOL:
+ if(rsp == str || rsp[-1] == L'\n') {
+ curinst++;
+ goto Again;
+ }
+ goto Done;
+ case OEOL:
+ if(*rsp == L'\0' && rep == nil) {
+ curinst++;
+ goto Again;
+ }
+ if(*rsp == '\n')
+ goto Any;
+ goto Done;
+ case OJMP:
+ curinst = curinst->a;
+ goto Again;
+ case OSPLIT:
+ if(avail == nil)
+ next = --pooltop;
+ else {
+ next = avail;
+ avail = avail->next;
+ }
+ next->pc = curinst->b;
+ if(msize > 0)
+ memcpy(next->sem, t->sem, sizeof(Resub)*msize);
+ next->pri = t->pri;
+ next->next = t->next;
+ t->next = next;
+ curinst = curinst->a;
+ goto Again;
+ case OSAVE:
+ if(curinst->sub < msize)
+ t->sem[curinst->sub].rsp = rsp;
+ curinst++;
+ goto Again;
+ case OUNSAVE:
+ if(curinst->sub == 0) {
+ /* "Highest" priority is the left-most longest. */
+ if (t->pri > matchpri)
+ goto Done;
+ match = 1;
+ matchpri = t->pri;
+ if(sem != nil && msize > 0) {
+ memcpy(sem, t->sem, sizeof(Resub)*msize);
+ sem->rep = rsp;
}
+ goto Done;
+ }
+ if(curinst->sub < msize)
+ t->sem[curinst->sub].rep = rsp;
+ curinst++;
+ goto Again;
+ Done:
+ next = t->next;
+ t->next = avail;
+ avail = t;
+ if(next == nil)
break;
+ t = next;
+ curinst = t->pc;
+ goto Again;
+ }
+Start:
+ /* Start again once if we haven't found anything. */
+ if(first == 1 && match == 0) {
+ first = 0;
+ if(avail == nil)
+ t = --pooltop;
+ else {
+ t = avail;
+ avail = avail->next;
}
+ if(msize > 0)
+ memset(t->sem, 0, sizeof(Resub)*msize);
+ /* "Lower" priority thread */
+ t->pri = matchpri = pri++;
+ t->next = nil;
+ curinst = prog->startinst;
+ goto Again;
}
- if(s == j->reol)
+ /* If we have a match and no extant threads, we are done. */
+ if(match == 1 && nlist->head == nil)
break;
- checkstart = j->startchar && nl->inst==0;
- s++;
- }while(r);
- return match;
-}
-
-static int
-rregexec2(Reprog *progp, /* program to run */
- Rune *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j
-)
-{
- Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
-
- /* mark space */
- j->relist[0] = relist0;
- j->relist[1] = relist1;
- j->reliste[0] = relist0 + nelem(relist0) - 2;
- j->reliste[1] = relist1 + nelem(relist1) - 2;
-
- return rregexec1(progp, bol, mp, ms, j);
-}
-
-extern int
-rregexec(Reprog *progp, /* program to run */
- Rune *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
-{
- Reljunk j;
- Relist relist0[LISTSIZE], relist1[LISTSIZE];
- int rv;
-
- /*
- * use user-specified starting/ending location if specified
- */
- j.rstarts = bol;
- j.reol = 0;
- if(mp && ms>0){
- if(mp->sp)
- j.rstarts = mp->rsp;
- if(mp->ep)
- j.reol = mp->rep;
+ tmp = clist;
+ clist = nlist;
+ nlist = tmp;
+ nlist->head = nil;
+ nlist->tail = &nlist->head;
}
- j.starttype = 0;
- j.startchar = 0;
- if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
- j.starttype = RUNE;
- j.startchar = progp->startinst->r;
- }
- if(progp->startinst->type == BOL)
- j.starttype = BOL;
-
- /* mark space */
- j.relist[0] = relist0;
- j.relist[1] = relist1;
- j.reliste[0] = relist0 + nelem(relist0) - 2;
- j.reliste[1] = relist1 + nelem(relist1) - 2;
-
- rv = rregexec1(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- rv = rregexec2(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- return -1;
+ if(rep != nil)
+ *rep = endr;
+ return match;
}
--- a/sys/src/libregexp/rregsub.c
+++ b/sys/src/libregexp/rregsub.c
@@ -1,64 +1,66 @@
#include <u.h>
#include <libc.h>
-#include "regexp.h"
+#include <regexp.h>
-/* substitute into one string using the matches from the last regexec() */
-extern void
-rregsub(Rune *sp, /* source string */
- Rune *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
+void
+rregsub(Rune *src, Rune *dst, int dlen, Resub *match, int msize)
{
- Rune *ssp, *ep;
int i;
+ Rune *ep, r;
- ep = dp+(dlen/sizeof(Rune))-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp[i].rsp != 0 && mp!=0 && ms>i)
- for(ssp = mp[i].rsp;
- ssp < mp[i].rep;
- ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
+ ep = dst + dlen-1;
+ for(;*src != L'\0'; src++) switch(*src) {
+ case L'\\':
+ switch(*++src) {
+ case L'0':
+ case L'1':
+ case L'2':
+ case L'3':
+ case L'4':
+ case L'5':
+ case L'6':
+ case L'7':
+ case L'8':
+ case L'9':
+ i = *src - L'0';
+ if(match != nil && i < msize && match[i].rsp != nil) {
+ r = *match[i].rep;
+ *match[i].rep = L'\0';
+ dst = runestrecpy(dst, ep+1, match[i].rsp);
+ *match[i].rep = r;
}
- }else if(*sp == '&'){
- if(mp[0].rsp != 0 && mp!=0 && ms>0)
- if(mp[0].rsp != 0)
- for(ssp = mp[0].rsp;
- ssp < mp[0].rep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
+ break;
+ case L'\\':
+ if(dst < ep)
+ *dst++ = L'\\';
+ else
+ goto End;
+ break;
+ case L'\0':
+ goto End;
+ default:
+ if(dst < ep)
+ *dst++ = *src;
+ else
+ goto End;
+ break;
}
- sp++;
+ break;
+ case L'&':
+ if(match != nil && msize > 0 && match[0].rsp != nil) {
+ r = *match[0].rep;
+ *match[0].rep = L'\0';
+ dst = runestrecpy(dst, ep+1, match[0].rsp);
+ *match[0].rep = r;
+ }
+ break;
+ default:
+ if(dst < ep)
+ *dst++ = *src;
+ else
+ goto End;
+ break;
}
- *dp = '\0';
+End:
+ *dst = L'\0';
}