shithub: riscv

Download patch

ref: 85824350b5f65053053245d141aaf7d668089d28
parent: 0f8168038af32828fcdc39575dea0e4de0c01122
author: ben <ben@rana>
date: Wed Apr 27 03:52:41 EDT 2016

remove ape regexp library, add utility for awk native port

--- a/sys/src/ape/lib/mkfile
+++ b/sys/src/ape/lib/mkfile
@@ -1,6 +1,6 @@
 </$objtype/mkfile
 
-DIRS=9 ap auth bio bsd bz2 draw fmt l mp net regexp sec utf v z
+DIRS=9 ap auth bio bsd bz2 draw fmt l mp net sec utf v z
 
 none:V:
 	echo mk all, install, installall, clean, or nuke
--- a/sys/src/ape/lib/regexp/mkfile
+++ /dev/null
@@ -1,15 +1,0 @@
-APE=/sys/src/ape
-<$APE/config
-
-LIB=/$objtype/lib/ape/libregexp.a
-OFILES=regcomp.$O\
-	regerror.$O\
-	regexec.$O\
-	regsub.$O\
-	regaux.$O\
-	rregexec.$O\
-	rregsub.$O\
-
-</sys/src/cmd/mksyslib
-
-CFLAGS=-c -DUTF -D_REGEXP_EXTENSION
--- a/sys/src/ape/lib/regexp/regaux.c
+++ /dev/null
@@ -1,56 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-/*
- *	Machine state
- */
-Relist*	_relist[2];
-Relist*	_reliste[2];
-int	_relistsize = LISTINCREMENT;
-
-/*
- *  save a new match in mp
- */
-extern void
-_renewmatch(Resub *mp, int ms, Resublist *sp)
-{
-	int i;
-
-	if(mp==0 || ms<=0)
-		return;
-	if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp ||
-	   (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){
-		for(i=0; i<ms && i<NSUBEXP; i++)
-			mp[i] = sp->m[i];
-		for(; i<ms; i++)
-			mp[i].s.sp = mp[i].e.ep = 0;
-	}
-}
-
-/*
- * Note optimization in _renewthread:
- * 	*lp must be pending when _renewthread called; if *l has been looked
- *		at already, the optimization is a bug.
- */
-extern Relist*
-_renewthread(Relist *lp,	/* _relist to add to */
-	Reinst *ip,	/* instruction to add */
-	Resublist *sep)	/* pointers to subexpressions */
-{
-	Relist *p;
-
-	for(p=lp; p->inst; p++){
-		if(p->inst == ip){
-			if((sep)->m[0].s.sp < p->se.m[0].s.sp)
-				p->se = *sep;
-			return 0;
-		}
-	}
-	p->inst = ip;
-	p->se = *sep;
-	(++p)->inst = 0;
-	return p;
-}
-
--- a/sys/src/ape/lib/regexp/regcomp.c
+++ /dev/null
@@ -1,560 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <setjmp.h>
-#include <string.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-#define	TRUE	1
-#define	FALSE	0
-
-/*
- * Parser Information
- */
-typedef
-struct Node
-{
-	Reinst*	first;
-	Reinst*	last;
-}Node;
-
-#define	NSTACK	20
-static	Node	andstack[NSTACK];
-static	Node	*andp;
-static	int	atorstack[NSTACK];
-static	int*	atorp;
-static	int	cursubid;		/* id of current subexpression */
-static	int	subidstack[NSTACK];	/* parallel to atorstack */
-static	int*	subidp;
-static	int	lastwasand;	/* Last token was operand */
-static	int	nbra;
-static	char*	exprp;		/* pointer to next character in source expression */
-static	int	lexdone;
-static	int	nclass;
-static	Reclass*classp;
-static	Reinst*	freep;
-static	int	errors;
-static	wchar_t	yyrune;		/* last lex'd rune */
-static	Reclass*yyclassp;	/* last lex'd class */
-
-/* predeclared crap */
-static	void	operator(int);
-static	void	pushand(Reinst*, Reinst*);
-static	void	pushator(int);
-static	void	evaluntil(int);
-static	int	bldcclass(void);
-
-static jmp_buf regkaboom;
-
-static	void
-rcerror(char *s)
-{
-	errors++;
-	regerror(s);
-	longjmp(regkaboom, 1);
-}
-
-static	Reinst*
-newinst(int t)
-{
-	freep->type = t;
-	freep->l.left = 0;
-	freep->r.right = 0;
-	return freep++;
-}
-
-static	void
-operand(int t)
-{
-	Reinst *i;
-
-	if(lastwasand)
-		operator(CAT);	/* catenate is implicit */
-	i = newinst(t);
-
-	if(t == CCLASS || t == NCCLASS)
-		i->r.cp = yyclassp;
-	if(t == RUNE)
-		i->r.r = yyrune;
-
-	pushand(i, i);
-	lastwasand = TRUE;
-}
-
-static	void
-operator(int t)
-{
-	if(t==RBRA && --nbra<0)
-		rcerror("unmatched right paren");
-	if(t==LBRA){
-		if(++cursubid >= NSUBEXP)
-			rcerror ("too many subexpressions");
-		nbra++;
-		if(lastwasand)
-			operator(CAT);
-	} else
-		evaluntil(t);
-	if(t != RBRA)
-		pushator(t);
-	lastwasand = FALSE;
-	if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
-		lastwasand = TRUE;	/* these look like operands */
-}
-
-static	void
-regerr2(char *s, int c)
-{
-	char buf[100];
-	char *cp = buf;
-	while(*s)
-		*cp++ = *s++;
-	*cp++ = c;
-	*cp = '\0'; 
-	rcerror(buf);
-}
-
-static	void
-cant(char *s)
-{
-	char buf[100];
-	strcpy(buf, "can't happen: ");
-	strcat(buf, s);
-	rcerror(buf);
-}
-
-static	void
-pushand(Reinst *f, Reinst *l)
-{
-	if(andp >= &andstack[NSTACK])
-		cant("operand stack overflow");
-	andp->first = f;
-	andp->last = l;
-	andp++;
-}
-
-static	void
-pushator(int t)
-{
-	if(atorp >= &atorstack[NSTACK])
-		cant("operator stack overflow");
-	*atorp++ = t;
-	*subidp++ = cursubid;
-}
-
-static	Node*
-popand(int op)
-{
-	Reinst *inst;
-
-	if(andp <= &andstack[0]){
-		regerr2("missing operand for ", op);
-		inst = newinst(NOP);
-		pushand(inst,inst);
-	}
-	return --andp;
-}
-
-static	int
-popator(void)
-{
-	if(atorp <= &atorstack[0])
-		cant("operator stack underflow");
-	--subidp;
-	return *--atorp;
-}
-
-static	void
-evaluntil(int pri)
-{
-	Node *op1, *op2;
-	Reinst *inst1, *inst2;
-
-	while(pri==RBRA || atorp[-1]>=pri){
-		switch(popator()){
-		default:
-			rcerror("unknown operator in evaluntil");
-			break;
-		case LBRA:		/* must have been RBRA */
-			op1 = popand('(');
-			inst2 = newinst(RBRA);
-			inst2->r.subid = *subidp;
-			op1->last->l.next = inst2;
-			inst1 = newinst(LBRA);
-			inst1->r.subid = *subidp;
-			inst1->l.next = op1->first;
-			pushand(inst1, inst2);
-			return;
-		case OR:
-			op2 = popand('|');
-			op1 = popand('|');
-			inst2 = newinst(NOP);
-			op2->last->l.next = inst2;
-			op1->last->l.next = inst2;
-			inst1 = newinst(OR);
-			inst1->r.right = op1->first;
-			inst1->l.left = op2->first;
-			pushand(inst1, inst2);
-			break;
-		case CAT:
-			op2 = popand(0);
-			op1 = popand(0);
-			op1->last->l.next = op2->first;
-			pushand(op1->first, op2->last);
-			break;
-		case STAR:
-			op2 = popand('*');
-			inst1 = newinst(OR);
-			op2->last->l.next = inst1;
-			inst1->r.right = op2->first;
-			pushand(inst1, inst1);
-			break;
-		case PLUS:
-			op2 = popand('+');
-			inst1 = newinst(OR);
-			op2->last->l.next = inst1;
-			inst1->r.right = op2->first;
-			pushand(op2->first, inst1);
-			break;
-		case QUEST:
-			op2 = popand('?');
-			inst1 = newinst(OR);
-			inst2 = newinst(NOP);
-			inst1->l.left = inst2;
-			inst1->r.right = op2->first;
-			op2->last->l.next = inst2;
-			pushand(inst1, inst2);
-			break;
-		}
-	}
-}
-
-static	Reprog*
-optimize(Reprog *pp)
-{
-	Reinst *inst, *target;
-	int size;
-	Reprog *npp;
-	int diff;
-
-	/*
-	 *  get rid of NOOP chains
-	 */
-	for(inst=pp->firstinst; inst->type!=END; inst++){
-		target = inst->l.next;
-		while(target->type == NOP)
-			target = target->l.next;
-		inst->l.next = target;
-	}
-
-	/*
-	 *  The original allocation is for an area larger than
-	 *  necessary.  Reallocate to the actual space used
-	 *  and then relocate the code.
-	 */
-	size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
-	npp = realloc(pp, size);
-	if(npp==0 || npp==pp)
-		return pp;
-	diff = (char *)npp - (char *)pp;
-	freep = (Reinst *)((char *)freep + diff);
-	for(inst=npp->firstinst; inst<freep; inst++){
-		switch(inst->type){
-		case OR:
-		case STAR:
-		case PLUS:
-		case QUEST:
-		case CCLASS:
-		case NCCLASS:
-			*(char **)&inst->r.right += diff;
-			break;
-		}
-		*(char **)&inst->l.left += diff;
-	}
-	*(char **)&npp->startinst += diff;
-	return npp;
-}
-
-#ifdef	DEBUG
-static	void
-dumpstack(void){
-	Node *stk;
-	int *ip;
-
-	print("operators\n");
-	for(ip=atorstack; ip<atorp; ip++)
-		print("0%o\n", *ip);
-	print("operands\n");
-	for(stk=andstack; stk<andp; stk++)
-		print("0%o\t0%o\n", stk->first->type, stk->last->type);
-}
-
-static	void
-dump(Reprog *pp)
-{
-	Reinst *l;
-	wchar_t *p;
-
-	l = pp->firstinst;
-	do{
-		print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
-			l->l.left-pp->firstinst, l->l.right-pp->firstinst);
-		if(l->type == RUNE)
-			print("\t%C\n", l->r);
-		else if(l->type == CCLASS || l->type == NCCLASS){
-			print("\t[");
-			if(l->type == NCCLASS)
-				print("^");
-			for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
-				if(p[0] == p[1])
-					print("%C", p[0]);
-				else
-					print("%C-%C", p[0], p[1]);
-			print("]\n");
-		} else
-			print("\n");
-	}while(l++->type);
-}
-#endif
-
-static	Reclass*
-newclass(void)
-{
-	if(nclass >= NCLASS)
-		regerr2("too many character classes; limit", NCLASS+'0');
-	return &(classp[nclass++]);
-}
-
-static	int
-nextc(wchar_t *rp)
-{
-	int n;
-
-	if(lexdone){
-		*rp = 0;
-		return 1;
-	}
-	n = mbtowc(rp, exprp, MB_CUR_MAX);
-	if (n <= 0)
-		n = 1;
-	exprp += n;
-	if(*rp == L'\\'){
-		n = mbtowc(rp, exprp, MB_CUR_MAX);
-		if (n <= 0)
-			n = 1;
-		exprp += n;
-		return 1;
-	}
-	if(*rp == 0)
-		lexdone = 1;
-	return 0;
-}
-
-static	int
-lex(int literal, int dot_type)
-{
-	int quoted;
-
-	quoted = nextc(&yyrune);
-	if(literal || quoted){
-		if(yyrune == 0)
-			return END;
-		return RUNE;
-	}
-
-	switch(yyrune){
-	case 0:
-		return END;
-	case L'*':
-		return STAR;
-	case L'?':
-		return QUEST;
-	case L'+':
-		return PLUS;
-	case L'|':
-		return OR;
-	case L'.':
-		return dot_type;
-	case L'(':
-		return LBRA;
-	case L')':
-		return RBRA;
-	case L'^':
-		return BOL;
-	case L'$':
-		return EOL;
-	case L'[':
-		return bldcclass();
-	}
-	return RUNE;
-}
-
-static int
-bldcclass(void)
-{
-	int type;
-	wchar_t r[NCCRUNE];
-	wchar_t *p, *ep, *np;
-	wchar_t rune;
-	int quoted;
-
-	/* we have already seen the '[' */
-	type = CCLASS;
-	yyclassp = newclass();
-
-	/* look ahead for negation */
-	ep = r;
-	quoted = nextc(&rune);
-	if(!quoted && rune == L'^'){
-		type = NCCLASS;
-		quoted = nextc(&rune);
-		*ep++ = L'\n';
-		*ep++ = L'\n';
-	}
-
-	/* parse class into a set of spans */
-	for(; ep<&r[NCCRUNE];){
-		if(rune == 0){
-			rcerror("malformed '[]'");
-			return 0;
-		}
-		if(!quoted && rune == L']')
-			break;
-		if(!quoted && rune == L'-'){
-			if(ep == r){
-				rcerror("malformed '[]'");
-				return 0;
-			}
-			quoted = nextc(&rune);
-			if((!quoted && rune == L']') || rune == 0){
-				rcerror("malformed '[]'");
-				return 0;
-			}
-			*(ep-1) = rune;
-		} else {
-			*ep++ = rune;
-			*ep++ = rune;
-		}
-		quoted = nextc(&rune);
-	}
-
-	/* sort on span start */
-	for(p = r; p < ep; p += 2){
-		for(np = p; np < ep; np += 2)
-			if(*np < *p){
-				rune = np[0];
-				np[0] = p[0];
-				p[0] = rune;
-				rune = np[1];
-				np[1] = p[1];
-				p[1] = rune;
-			}
-	}
-
-	/* merge spans */
-	np = yyclassp->spans;
-	p = r;
-	if(r == ep)
-		yyclassp->end = np;
-	else {
-		np[0] = *p++;
-		np[1] = *p++;
-		for(; p < ep; p += 2)
-			if(p[0] <= np[1]){
-				if(p[1] > np[1])
-					np[1] = p[1];
-			} else {
-				np += 2;
-				np[0] = p[0];
-				np[1] = p[1];
-			}
-		yyclassp->end = np+2;
-	}
-
-	return type;
-}
-
-static	Reprog*
-regcomp1(char *s, int literal, int dot_type)
-{
-	int token;
-	Reprog *pp;
-
-	/* get memory for the program */
-	pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
-	if(pp == 0){
-		regerror("out of memory");
-		return 0;
-	}
-	freep = pp->firstinst;
-	classp = pp->class;
-	errors = 0;
-
-	if(setjmp(regkaboom))
-		goto out;
-
-	/* go compile the sucker */
-	lexdone = 0;
-	exprp = s;
-	nclass = 0;
-	nbra = 0;
-	atorp = atorstack;
-	andp = andstack;
-	subidp = subidstack;
-	lastwasand = FALSE;
-	cursubid = 0;
-
-	/* Start with a low priority operator to prime parser */
-	pushator(START-1);
-	while((token = lex(literal, dot_type)) != END){
-		if((token&0300) == OPERATOR)
-			operator(token);
-		else
-			operand(token);
-	}
-
-	/* Close with a low priority operator */
-	evaluntil(START);
-
-	/* Force END */
-	operand(END);
-	evaluntil(START);
-#ifdef DEBUG
-	dumpstack();
-#endif
-	if(nbra)
-		rcerror("unmatched left paren");
-	--andp;	/* points to first and only operand */
-	pp->startinst = andp->first;
-#ifdef DEBUG
-	dump(pp);
-#endif
-	pp = optimize(pp);
-#ifdef DEBUG
-	print("start: %d\n", andp->first-pp->firstinst);
-	dump(pp);
-#endif
-out:
-	if(errors){
-		free(pp);
-		pp = 0;
-	}
-	return pp;
-}
-
-extern	Reprog*
-regcomp(char *s)
-{
-	return regcomp1(s, 0, ANY);
-}
-
-extern	Reprog*
-regcomplit(char *s)
-{
-	return regcomp1(s, 1, ANY);
-}
-
-extern	Reprog*
-regcompnl(char *s)
-{
-	return regcomp1(s, 0, ANYNL);
-}
--- a/sys/src/ape/lib/regexp/regcomp.h
+++ /dev/null
@@ -1,61 +1,0 @@
-/*
- *  substitution list
- */
-typedef struct Resublist	Resublist;
-struct	Resublist
-{
-	Resub	m[32];
-};
-
-/* max subexpressions per program */
-Resublist ReSuBlIsT;
-#define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub))
-
-/* max character classes per program */
-Reprog	RePrOg;
-#define	NCLASS	(sizeof(RePrOg.class)/sizeof(Reclass))
-
-/* max rune ranges per character class */
-#define NCCRUNE	(sizeof(Reclass)/sizeof(wchar_t))
-
-/*
- * Actions and Tokens (Reinst types)
- *
- *	02xx are operators, value == precedence
- *	03xx are tokens, i.e. operands for operators
- */
-#define RUNE		0177
-#define	OPERATOR	0200	/* Bitmask of all operators */
-#define	START		0200	/* Start, used for marker on stack */
-#define	RBRA		0201	/* Right bracket, ) */
-#define	LBRA		0202	/* Left bracket, ( */
-#define	OR		0203	/* Alternation, | */
-#define	CAT		0204	/* Concatentation, implicit operator */
-#define	STAR		0205	/* Closure, * */
-#define	PLUS		0206	/* a+ == aa* */
-#define	QUEST		0207	/* a? == a|nothing, i.e. 0 or 1 a's */
-#define	ANY		0300	/* Any character except newline, . */
-#define	ANYNL		0301	/* Any character including newline, . */
-#define	NOP		0302	/* No operation, internal use only */
-#define	BOL		0303	/* Beginning of line, ^ */
-#define	EOL		0304	/* End of line, $ */
-#define	CCLASS		0305	/* Character class, [] */
-#define	NCCLASS		0306	/* Negated character class, [] */
-#define	END		0377	/* Terminate: match found */
-
-/*
- *  regexec execution lists
- */
-#define LISTINCREMENT 8
-typedef struct Relist	Relist;
-struct Relist
-{
-	Reinst		*inst;		/* Reinstruction of the thread */
-	Resublist	se;		/* matched subexpressions in this thread */
-};
-extern Relist*	_relist[2];
-extern Relist*	_reliste[2];
-extern int	_relistsize;
-
-extern Relist*	_renewthread(Relist*, Reinst*, Resublist*);
-extern void	_renewmatch(Resub*, int, Resublist*);
--- a/sys/src/ape/lib/regexp/regerror.c
+++ /dev/null
@@ -1,16 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "regexp.h"
-
-void
-regerror(char *s)
-{
-	char buf[132];
-
-	strcpy(buf, "regerror: ");
-	strcat(buf, s);
-	strcat(buf, "\n");
-	fwrite(buf, 1, strlen(buf), stderr);
-	exit(1);
-}
--- a/sys/src/ape/lib/regexp/regexec.c
+++ /dev/null
@@ -1,191 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty;		/* empty set of matches */
-
-/*
- *  return	0 if no match
- *		>0 if a match
- *		<0 if we ran out of _relist space
- */
-static int
-regexec1(Reprog *progp,	/* program to run */
-	char *bol,	/* string to run machine on */
-	Resub *mp,	/* subexpression elements */
-	int ms,		/* number of elements at mp */
-	char *starts,
-	char *eol,
-	wchar_t startchar)
-{
-	int flag=0;
-	Reinst *inst;
-	Relist *tlp;
-	char *s;
-	int i, checkstart;
-	wchar_t r, *rp, *ep;
-	int n;
-	Relist* tl;		/* This list, next list */
-	Relist* nl;
-	Relist* tle;		/* ends of this and next list */
-	Relist* nle;
-	int match;
-
-	match = 0;
-	checkstart = startchar;
-	sempty.m[0].s.sp = 0;
-	if(mp!=0)
-		for(i=0; i<ms; i++)
-			mp[i].s.sp = mp[i].e.ep = 0;
-	_relist[0][0].inst = _relist[1][0].inst = 0;
-
-	/* Execute machine once for each character, including terminal NUL */
-	s = starts;
-	do{
-		/* fast check for first char */
-		r = *(unsigned char*)s;
-		if(checkstart && r != startchar){
-			s++;
-			continue;
-		}
-
-		if(r < Runeself)
-			n = 1;
-		else {
-			n = mbtowc(&r, s, MB_CUR_MAX);
-			if (n <= 0)
-				n = 1;
-		}
-
-		/* switch run lists */
-		tl = _relist[flag];
-		tle = _reliste[flag];
-		nl = _relist[flag^=1];
-		nle = _reliste[flag];
-		nl->inst = 0;
-
-		/* Add first instruction to current list */
-		if(match == 0){
-			sempty.m[0].s.sp = s;
-			_renewthread(tl, progp->startinst, &sempty);
-		}
-
-		/* Execute machine until current list is empty */
-		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
-			if(s == eol)
-				break;
-
-			for(inst = tlp->inst; ; inst = inst->l.next){
-				switch(inst->type){
-				case RUNE:	/* regular character */
-					if(inst->r.r == r)
-						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-							return -1;
-					break;
-				case LBRA:
-					tlp->se.m[inst->r.subid].s.sp = s;
-					continue;
-				case RBRA:
-					tlp->se.m[inst->r.subid].e.ep = s;
-					continue;
-				case ANY:
-					if(r != '\n')
-						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-							return -1;
-					break;
-				case ANYNL:
-					if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-						return -1;
-					break;
-				case BOL:
-					if(s == bol || *(s-1) == '\n')
-						continue;
-					break;
-				case EOL:
-					if(r == 0 || r == '\n')
-						continue;
-					break;
-				case CCLASS:
-					ep = inst->r.cp->end;
-					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-						if(r >= rp[0] && r <= rp[1]){
-							if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-								return -1;
-							break;
-						}
-					break;
-				case NCCLASS:
-					ep = inst->r.cp->end;
-					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-						if(r >= rp[0] && r <= rp[1])
-							break;
-					if(rp == ep)
-						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-							return -1;
-					break;
-				case OR:
-					/* evaluate right choice later */
-					if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
-						return -1;
-					/* efficiency: advance and re-evaluate */
-					continue;
-				case END:	/* Match! */
-					match = 1;
-					tlp->se.m[0].e.ep = s;
-					if(mp != 0)
-						_renewmatch(mp, ms, &tlp->se);
-					break;
-				}
-				break;
-			}
-		}
-		checkstart = startchar && nl->inst==0;
-		s += n;
-	}while(r);
-	return match;
-}
-
-extern int
-regexec(Reprog *progp,	/* program to run */
-	char *bol,	/* string to run machine on */
-	Resub *mp,	/* subexpression elements */
-	int ms)		/* number of elements at mp */
-{
-	char *starts;	/* where to start match */
-	char *eol;	/* where to end match */
-	wchar_t startchar;
-	int rv;
-
-	/*
- 	 *  use user-specified starting/ending location if specified
-	 */
-	starts = bol;
-	eol = 0;
-	if(mp && ms>0){
-		if(mp->s.sp)
-			starts = mp->s.sp;
-		if(mp->e.ep)
-			eol = mp->e.ep;
-	}
-	startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
-		? progp->startinst->r.r : 0;
-
-	/* keep trying till we have enough list space to terminate */
-	for(;;){
-		if(_relist[0] == 0){
-			_relist[0] = malloc(2*_relistsize*sizeof(Relist));
-			_relist[1] = _relist[0] + _relistsize;
-			_reliste[0] = _relist[0] + _relistsize - 1;
-			_reliste[1] = _relist[1] + _relistsize - 1;
-			if(_relist[0] == 0)
-				regerror("_relist overflow");
-		}
-		rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
-		if(rv >= 0)
-			return rv;
-		free(_relist[0]);
-		_relist[0] = 0;
-		_relistsize += LISTINCREMENT;
-	}
-}
--- a/sys/src/ape/lib/regexp/regsub.c
+++ /dev/null
@@ -1,64 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern	void
-regsub(char *sp,	/* source string */
-	char *dp,	/* destination string */
-	int dlen,
-	Resub *mp,	/* subexpression elements */
-	int ms)		/* number of elements pointed to by mp */
-{
-	char *ssp, *ep;
-	int i;
-
-	ep = dp+dlen-1;
-	while(*sp != '\0'){
-		if(*sp == '\\'){
-			switch(*++sp){
-			case '0':
-			case '1':
-			case '2':
-			case '3':
-			case '4':
-			case '5':
-			case '6':
-			case '7':
-			case '8':
-			case '9':
-				i = *sp-'0';
-				if(mp[i].s.sp != 0 && mp!=0 && ms>i)
-					for(ssp = mp[i].s.sp;
-					     ssp < mp[i].e.ep;
-					     ssp++)
-						if(dp < ep)
-							*dp++ = *ssp;
-				break;
-			case '\\':
-				if(dp < ep)
-					*dp++ = '\\';
-				break;
-			case '\0':
-				sp--;
-				break;
-			default:
-				if(dp < ep)
-					*dp++ = *sp;
-				break;
-			}
-		}else if(*sp == '&'){				
-			if(mp[0].s.sp != 0 && mp!=0 && ms>0)
-			if(mp[0].s.sp != 0)
-				for(ssp = mp[0].s.sp;
-				     ssp < mp[0].e.ep; ssp++)
-					if(dp < ep)
-						*dp++ = *ssp;
-		}else{
-			if(dp < ep)
-				*dp++ = *sp;
-		}
-		sp++;
-	}
-	*dp = '\0';
-}
--- a/sys/src/ape/lib/regexp/rregexec.c
+++ /dev/null
@@ -1,181 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty;		/* empty set of matches */
-
-/*
- *  return	0 if no match
- *		>0 if a match
- *		<0 if we ran out of _relist space
- */
-static int
-rregexec1(Reprog *progp,	/* program to run */
-	wchar_t *bol,		/* string to run machine on */
-	Resub *mp,		/* subexpression elements */
-	int ms,			/* number of elements at mp */
-	wchar_t *starts,
-	wchar_t *eol,
-	wchar_t startchar)
-{
-	int flag=0;
-	Reinst *inst;
-	Relist *tlp;
-	wchar_t *s;
-	int i, checkstart;
-	wchar_t r, *rp, *ep;
-	int n;
-	Relist* tl;		/* This list, next list */
-	Relist* nl;
-	Relist* tle;		/* ends of this and next list */
-	Relist* nle;
-	int match;
-
-	match = 0;
-	checkstart = startchar;
-	sempty.m[0].s.rsp = 0;
-	if(mp!=0)
-		for(i=0; i<ms; i++)
-			mp[i].s.rsp = mp[i].e.rep = 0;
-	_relist[0][0].inst = _relist[1][0].inst = 0;
-
-	/* Execute machine once for each character, including terminal NUL */
-	s = starts;
-	do{
-		r = *s;
-
-		/* fast check for first char */
-		if(checkstart && r!=startchar){
-			s++;
-			continue;
-		}
-
-		/* switch run lists */
-		tl = _relist[flag];
-		tle = _reliste[flag];
-		nl = _relist[flag^=1];
-		nle = _reliste[flag];
-		nl->inst = 0;
-
-		/* Add first instruction to current list */
-		sempty.m[0].s.rsp = s;
-		_renewthread(tl, progp->startinst, &sempty);
-
-		/* Execute machine until current list is empty */
-		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
-			if(s == eol)
-				break;
-
-			for(inst=tlp->inst; ; inst = inst->l.next){
-				switch(inst->type){
-				case RUNE:	/* regular character */
-					if(inst->r.r == r)
-						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-							return -1;
-					break;
-				case LBRA:
-					tlp->se.m[inst->r.subid].s.rsp = s;
-					continue;
-				case RBRA:
-					tlp->se.m[inst->r.subid].e.rep = s;
-					continue;
-				case ANY:
-					if(r != '\n')
-						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-							return -1;
-					break;
-				case ANYNL:
-					if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-							return -1;
-					break;
-				case BOL:
-					if(s == bol || *(s-1) == '\n')
-						continue;
-					break;
-				case EOL:
-					if(r == 0 || r == '\n')
-						continue;
-					break;
-				case CCLASS:
-					ep = inst->r.cp->end;
-					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-						if(r >= rp[0] && r <= rp[1]){
-							if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-								return -1;
-							break;
-						}
-					break;
-				case NCCLASS:
-					ep = inst->r.cp->end;
-					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-						if(r >= rp[0] && r <= rp[1])
-							break;
-					if(rp == ep)
-						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-							return -1;
-					break;
-				case OR:
-					/* evaluate right choice later */
-					if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
-						return -1;
-					/* efficiency: advance and re-evaluate */
-					continue;
-				case END:	/* Match! */
-					match = 1;
-					tlp->se.m[0].e.rep = s;
-					if(mp != 0)
-						_renewmatch(mp, ms, &tlp->se);
-					break;
-				}
-				break;
-			}
-		}
-		checkstart = startchar && nl->inst==0;
-		s++;
-	}while(r);
-	return match;
-}
-
-extern int
-rregexec(Reprog *progp,	/* program to run */
-	wchar_t *bol,	/* string to run machine on */
-	Resub *mp,	/* subexpression elements */
-	int ms)		/* number of elements at mp */
-{
-	wchar_t *starts;	/* where to start match */
-	wchar_t *eol;	/* where to end match */
-	wchar_t startchar;
-	int rv;
-
-	/*
- 	 *  use user-specified starting/ending location if specified
-	 */
-	starts = bol;
-	eol = 0;
-	if(mp && ms>0){
-		if(mp->s.rsp)
-			starts = mp->s.rsp;
-		if(mp->e.rep)
-			eol = mp->e.rep;
-	}
-	startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0;
-
-	/* keep trying till we have enough list space to terminate */
-	for(;;){
-		if(_relist[0] == 0){
-			_relist[0] = malloc(2*_relistsize*sizeof(Relist));
-			_relist[1] = _relist[0] + _relistsize;
-			_reliste[0] = _relist[0] + _relistsize - 1;
-			_reliste[1] = _relist[1] + _relistsize - 1;
-			if(_relist[0] == 0)
-				regerror("_relist overflow");
-		}
-		rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar);
-		if(rv >= 0)
-			return rv;
-		free(_relist[0]);
-		_relist[0] = 0;
-		_relistsize += LISTINCREMENT;
-	}
-}
--- a/sys/src/ape/lib/regexp/rregsub.c
+++ /dev/null
@@ -1,64 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern	void
-rregsub(wchar_t *sp,	/* source string */
-	wchar_t *dp,	/* destination string */
-	int dlen,
-	Resub *mp,	/* subexpression elements */
-	int ms)		/* number of elements pointed to by mp */
-{
-	wchar_t *ssp, *ep;
-	int i;
-
-	ep = dp+(dlen/sizeof(wchar_t))-1;
-	while(*sp != '\0'){
-		if(*sp == '\\'){
-			switch(*++sp){
-			case '0':
-			case '1':
-			case '2':
-			case '3':
-			case '4':
-			case '5':
-			case '6':
-			case '7':
-			case '8':
-			case '9':
-				i = *sp-'0';
-				if(mp[i].s.rsp != 0 && mp!=0 && ms>i)
-					for(ssp = mp[i].s.rsp;
-					     ssp < mp[i].e.rep;
-					     ssp++)
-						if(dp < ep)
-							*dp++ = *ssp;
-				break;
-			case '\\':
-				if(dp < ep)
-					*dp++ = '\\';
-				break;
-			case '\0':
-				sp--;
-				break;
-			default:
-				if(dp < ep)
-					*dp++ = *sp;
-				break;
-			}
-		}else if(*sp == '&'){				
-			if(mp[0].s.rsp != 0 && mp!=0 && ms>0)
-			if(mp[0].s.rsp != 0)
-				for(ssp = mp[0].s.rsp;
-				     ssp < mp[0].e.rep; ssp++)
-					if(dp < ep)
-						*dp++ = *ssp;
-		}else{
-			if(dp < ep)
-				*dp++ = *sp;
-		}
-		sp++;
-	}
-	*dp = '\0';
-}
--- /dev/null
+++ b/sys/src/cmd/awk/popen.c
@@ -1,0 +1,91 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "awk.h"
+
+#define MAXFORKS	20
+#define NSYSFILE	3
+#define	tst(a,b)	(mode == OREAD? (b) : (a))
+#define	RDR	0
+#define	WTR	1
+
+struct a_fork {
+	short	done;
+	short	fd;
+	int	pid;
+	char status[128];
+};
+static struct a_fork the_fork[MAXFORKS];
+
+Biobuf*
+popen(char *cmd, int mode)
+{
+	int p[2];
+	int myside, hisside, pid;
+	int i, ind;
+
+	for (ind = 0; ind < MAXFORKS; ind++)
+		if (the_fork[ind].pid == 0)
+			break;
+	if (ind == MAXFORKS)
+		return nil;
+	if(pipe(p) < 0)
+		return nil;
+	myside = tst(p[WTR], p[RDR]);
+	hisside = tst(p[RDR], p[WTR]);
+	switch (pid = fork()) {
+	case -1:
+		return nil;
+	case 0:
+		/* myside and hisside reverse roles in child */
+		close(myside);
+		dup(hisside, tst(0, 1));
+		for (i=NSYSFILE; i<FOPEN_MAX; i++)
+			close(i);
+		execl("/bin/rc", "rc", "-c", cmd, nil);
+		exits("exec failed");
+	default:
+		the_fork[ind].pid = pid;
+		the_fork[ind].fd = myside;
+		the_fork[ind].done = 0;
+		close(hisside);
+		return(Bfdopen(myside, mode));
+	}
+}
+
+int
+pclose(Biobuf *ptr)
+{
+	int f, r, ind;
+	Waitmsg *status;
+
+	f = Bfildes(ptr);
+	Bterm(ptr);
+	for (ind = 0; ind < MAXFORKS; ind++)
+		if (the_fork[ind].fd == f && the_fork[ind].pid != 0)
+			break;
+	if (ind == MAXFORKS)
+		return -1;
+	if (!the_fork[ind].done) {
+		do {
+			if((status = wait()) == nil)
+				r = -1;
+			else
+				r = status->pid;
+			for (f = 0; f < MAXFORKS; f++) {
+				if (r == the_fork[f].pid) {
+					the_fork[f].done = 1;
+					strecpy(the_fork[f].status, the_fork[f].status+512, status->msg);
+					break;
+				}
+			}
+			free(status);
+		} while(r != the_fork[ind].pid && r != -1);
+		if(r == -1)
+			strcpy(the_fork[ind].status, "No loved ones to wait for");
+	}
+	the_fork[ind].pid = 0;
+	if(the_fork[ind].status[0] != '\0')
+		return 1;
+	return 0;
+}
--- a/sys/src/libregexp/regcomp.c
+++ b/sys/src/libregexp/regcomp.c
@@ -184,12 +184,10 @@
 		return nil;
 	}
 
+	maxthr = regstrlen;
 	parsetr = node(&plex, TSUB, e0(&plex), nil);
-	maxthr = maxthreads(parsetr);
-	if(maxthr == -1)
-		maxthr = regstrlen;
 
-	prtree(parsetr, 0, 1);
+//	prtree(parsetr, 0, 1);
 	reprog = malloc(sizeof(Reprog) +
 	                sizeof(Reinst) * plex.instrs +
 	                sizeof(Rethread) * maxthr +