shithub: riscv

Download patch

ref: 9ae083d81642be3a3ee7ff1e8d28fb9381bb1abf
parent: f94167ebeebc108f393b3b2ec279cee2afae56e7
author: spew <devnull@localhost>
date: Thu Feb 2 16:21:34 EST 2017

libregexp: simplify regular expression vm implementation
Make the logic around who has priority over the final
match simpler by merging the priority generation and
match fields in a smarter way. Move the creation of
new thread matches up to the top to avoid jumping all
over the place.

--- a/sys/src/libregexp/regcomp.c
+++ b/sys/src/libregexp/regcomp.c
@@ -156,17 +156,6 @@
 	return plex;
 }
 
-static int
-maxthreads(Renode *tree)
-{
-	tree = tree->left;
-	if(tree->op == TCAT)
-		tree = tree->left;
-	if(tree->op == TBOL)
-		return 2;
-	return -1;
-}
-
 static Reprog*
 regcomp1(char *regstr, int nl, int lit)
 {
@@ -187,7 +176,7 @@
 		return nil;
 	}
 
-	maxthr = regstrlen;
+	maxthr = regstrlen + 1;
 	parsetr = node(&plex, TSUB, e0(&plex), nil);
 
 //	prtree(parsetr, 0, 1);
@@ -304,12 +293,13 @@
 static Reinst*
 compile(Renode *parsetr, Reprog *reprog, int nl)
 {
-	Reinst *reinst;
+	Reinst *reinst, *end;
 	int sub;
 
 	sub = 0;
 	reinst = (Reinst*)(reprog+1);
-	compile1(parsetr, reinst, &sub, nl);
+	end = compile1(parsetr, reinst, &sub, nl);
+	assert(reinst + reprog->len == end);
 	return reinst;
 }
 
--- a/sys/src/libregexp/regexec.c
+++ b/sys/src/libregexp/regexec.c
@@ -4,30 +4,31 @@
 #include "regimpl.h"
 
 typedef struct RethreadQ RethreadQ;
-struct RethreadQ
-{
+struct RethreadQ {
 	Rethread *head;
 	Rethread **tail;
 };
 
 int
-regexec(Reprog *prog, char *str, Resub *sem, int msize)
+regexec(Reprog *p, char *str, Resub *sem, int msize)
 {
 	RethreadQ lists[2], *clist, *nlist, *tmp;
-	Rethread *t, *next, *pooltop, *avail;
-	Reinst *curinst;
+	Rethread *t, *next, *pool, *avail;
+	Reinst *ci;
 	Rune r;
 	char *sp, *ep, endc;
-	int i, match, first, gen, matchpri, pri;
+	int i, matchgen, gen;
 
 	if(msize > NSUBEXPM)
 		msize = NSUBEXPM;
 
-	if(prog->startinst->gen != 0) {
-		for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
-			curinst->gen = 0;
+	if(p->startinst->gen != 0) {
+		for(ci = p->startinst; ci < p->startinst + p->len; ci++)
+			ci->gen = 0;
 	}
 
+	memset(p->threads, 0, sizeof(Rethread)*p->nthr);
+
 	clist = lists;
 	clist->head = nil;
 	clist->tail = &clist->head;
@@ -35,10 +36,10 @@
 	nlist->head = nil;
 	nlist->tail = &nlist->head;
 
-	pooltop = prog->threads + prog->nthr;
+	pool = p->threads;
 	avail = nil;
 
-	pri = matchpri = gen = match = 0;
+	gen = matchgen = 0;
 	sp = str;
 	ep = nil;
 	endc = '\0';
@@ -51,26 +52,40 @@
 			*sem->ep = '\0';
 		}
 	}
-	r = Runemax + 1; 
-	for(; r != L'\0'; sp += i) {
-		gen++;
+	for(r = 1; r != L'\0'; sp += i) {
 		i = chartorune(&r, sp);
-		first = 1;
+		gen++;
+		if(matchgen == 0) {
+			if(avail == nil) {
+				assert(pool < p->threads + p->nthr);
+				t = pool++;
+			} else {
+				t = avail;
+				avail = avail->next;
+			}
+			t->i = p->startinst;
+			if(msize > 0)
+				memset(t->sem, 0, sizeof(Resub)*msize);
+			t->next = nil;
+			t->gen = gen;
+			*clist->tail = t;
+			clist->tail = &t->next;
+		}
 		t = clist->head;
 		if(t == nil)
-			goto Start;
-		curinst = t->pc;
+			break;
+		ci = t->i;
 Again:
-		if(curinst->gen == gen)
+		if(ci->gen == gen || matchgen && t->gen > matchgen)
 			goto Done;
-		curinst->gen = gen;
-		switch(curinst->op) {
+		ci->gen = gen;
+		switch(ci->op) {
 		case ORUNE:
-			if(r != curinst->r)
+			if(r != ci->r)
 				goto Done;
 		case OANY: /* fallthrough */
 			next = t->next;
-			t->pc = curinst + 1;
+			t->i = ci + 1;
 			t->next = nil;
 			*nlist->tail = t;
 			nlist->tail = &t->next;
@@ -77,18 +92,18 @@
 			if(next == nil)
 				break;
 			t = next;
-			curinst = t->pc;
+			ci = t->i;
 			goto Again;
 		case OCLASS:
 		Class:
-			if(r < curinst->r)
+			if(r < ci->r)
 				goto Done;
-			if(r > curinst->r1) {
-				curinst++;
+			if(r > ci->r1) {
+				ci++;
 				goto Class;
 			}
 			next = t->next;
-			t->pc = curinst->a;
+			t->i = ci->a;
 			t->next = nil;
 			*nlist->tail = t;
 			nlist->tail = &t->next;
@@ -95,56 +110,53 @@
 			if(next == nil)
 				break;
 			t = next;
-			curinst = t->pc;
+			ci = t->i;
 			goto Again;
 		case ONOTNL:
 			if(r != L'\n') {
-				curinst++;
+				ci++;
 				goto Again;
 			}
 			goto Done;
 		case OBOL:
 			if(sp == str || sp[-1] == '\n') {
-				curinst++;
+				ci++;
 				goto Again;
 			}
 			goto Done;
 		case OEOL:
 			if(r == L'\n' || r == L'\0' && ep == nil) {
-				curinst++;
+				ci++;
 				goto Again;
 			}
 			goto Done;
 		case OJMP:
-			curinst = curinst->a;
+			ci = ci->a;
 			goto Again;
 		case OSPLIT:
-			if(avail == nil)
-				next = --pooltop;
-			else {
+			if(avail == nil) {
+				assert(pool < p->threads + p->nthr);
+				next = pool++;
+			} else {
 				next = avail;
 				avail = avail->next;
 			}
-			next->pc = curinst->b;
+			next->i = ci->b;
 			if(msize > 0)
 				memcpy(next->sem, t->sem, sizeof(Resub)*msize);
-			next->pri = t->pri;
 			next->next = t->next;
+			next->gen = t->gen;
 			t->next = next;
-			curinst = curinst->a;
+			ci = ci->a;
 			goto Again;
 		case OSAVE:
-			if(curinst->sub < msize)
-				t->sem[curinst->sub].sp = sp;
-			curinst++;
+			if(ci->sub < msize)
+				t->sem[ci->sub].sp = sp;
+			ci++;
 			goto Again;
 		case OUNSAVE:
-			if(curinst->sub == 0) {
-				/* "Highest" priority is the left-most longest. */
-				if (t->pri > matchpri)
-					goto Done;
-				match = 1;
-				matchpri = t->pri;
+			if(ci->sub == 0) {
+				matchgen = t->gen;
 				if(sem != nil && msize > 0) {
 					memcpy(sem, t->sem, sizeof(Resub)*msize);
 					sem->ep = sp;
@@ -151,9 +163,9 @@
 				}
 				goto Done;
 			}
-			if(curinst->sub < msize)
-				t->sem[curinst->sub].ep = sp;
-			curinst++;
+			if(ci->sub < msize)
+				t->sem[ci->sub].ep = sp;
+			ci++;
 			goto Again;
 		Done:
 			next = t->next;
@@ -162,30 +174,9 @@
 			if(next == nil)
 				break;
 			t = next;
-			curinst = t->pc;
+			ci = t->i;
 			goto Again;
 		}
-Start:
-		/* Start again once if we haven't found anything. */
-		if(first == 1 && match == 0) {
-			first = 0;
-			if(avail == nil)
-				t = --pooltop;
-			else {
-				t = avail;
-				avail = avail->next;
-			}
-			if(msize > 0)
-				memset(t->sem, 0, sizeof(Resub)*msize);
-			/* "Lower" priority thread */
-			t->pri = matchpri = pri++;
-			t->next = nil;
-			curinst = prog->startinst;
-			goto Again;
-		}
-		/* If we have a match and no extant threads, we are done. */
-		if(match == 1 && nlist->head == nil)
-			break;
 		tmp = clist;
 		clist = nlist;
 		nlist = tmp;
@@ -194,5 +185,5 @@
 	}
 	if(ep != nil)
 		*ep = endc;
-	return match;
+	return matchgen > 0 ? 1 : 0;
 }
--- a/sys/src/libregexp/regimpl.h
+++ b/sys/src/libregexp/regimpl.h
@@ -1,5 +1,4 @@
-enum
-{
+enum {
 	LANY = 0,
 	LBOL,
 	LCLASS,
@@ -30,8 +29,7 @@
 typedef struct Parselex Parselex;
 typedef struct Renode Renode;
 
-struct Parselex
-{
+struct Parselex {
 	/* Parse */
 	Renode *next;
 	Renode *nodes;
@@ -50,8 +48,8 @@
 	Rune cpairs[400+2];
 	int nc;
 };
-struct Renode
-{
+
+struct Renode {
 	int op;
 	Renode *left;
 	Rune r;
@@ -63,15 +61,15 @@
 	};
 	int nclass;
 };
-struct Rethread
-{
-	Reinst *pc;
+
+struct Rethread {
+	Reinst *i;
 	Resub sem[NSUBEXPM];
-	int pri;
 	Rethread *next;
+	int gen;
 };
-struct Reinst
-{
+
+struct Reinst {
 	char op;
 	int gen;
 	Reinst *a;
--- a/sys/src/libregexp/rregexec.c
+++ b/sys/src/libregexp/rregexec.c
@@ -4,29 +4,30 @@
 #include "regimpl.h"
 
 typedef struct RethreadQ RethreadQ;
-struct RethreadQ
-{
+struct RethreadQ {
 	Rethread *head;
 	Rethread **tail;
 };
 
 int
-rregexec(Reprog *prog, Rune *str, Resub *sem, int msize)
+rregexec(Reprog *p, Rune *str, Resub *sem, int msize)
 {
 	RethreadQ lists[2], *clist, *nlist, *tmp;
-	Rethread *t, *next, *pooltop, *avail;
-	Reinst *curinst;
-	Rune *rsp, *rep, endr, last;
-	int match, first, gen, pri, matchpri;
+	Rethread *t, *next, *pool, *avail;
+	Reinst *ci;
+	Rune *rsp, *rep, endr, r;
+	int matchgen, gen;
 
 	if(msize > NSUBEXPM)
 		msize = NSUBEXPM;
 
-	if(prog->startinst->gen != 0) {
-		for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
-			curinst->gen = 0;
+	if(p->startinst->gen != 0) {
+		for(ci = p->startinst; ci < p->startinst + p->len; ci++)
+			ci->gen = 0;
 	}
 
+	memset(p->threads, 0, sizeof(Rethread)*p->nthr);
+
 	clist = lists;
 	clist->head = nil;
 	clist->tail = &clist->head;
@@ -34,10 +35,10 @@
 	nlist->head = nil;
 	nlist->tail = &nlist->head;
 
-	pooltop = prog->threads + prog->nthr;
+	pool = p->threads;
 	avail = nil;
 
-	pri = matchpri = gen = match = 0;
+	gen = matchgen = 0;
 	rsp = str;
 	rep = nil;
 	endr = L'\0';
@@ -50,26 +51,40 @@
 			*sem->rep = '\0';
 		}
 	}
-	last = 1;
-	for(; last != L'\0'; rsp++) {
+	for(r = 1; r != L'\0'; rsp++) {
+		r = *rsp;
 		gen++;
-		last = *rsp;
-		first = 1;
+		if(matchgen == 0) {
+			if(avail == nil) {
+				assert(pool < p->threads + p->nthr);
+				t = pool++;
+			} else {
+				t = avail;
+				avail = avail->next;
+			}
+			t->i = p->startinst;
+			if(msize > 0)
+				memset(t->sem, 0, sizeof(Resub)*msize);
+			t->next = nil;
+			t->gen = gen;
+			*clist->tail = t;
+			clist->tail = &t->next;
+		}
 		t = clist->head;
 		if(t == nil)
-			goto Start;
-		curinst = t->pc;
+			break;
+		ci = t->i;
 Again:
-		if(curinst->gen == gen)
+		if(ci->gen == gen || matchgen && t->gen > matchgen)
 			goto Done;
-		curinst->gen = gen;
-		switch(curinst->op) {
+		ci->gen = gen;
+		switch(ci->op) {
 		case ORUNE:
-			if(*rsp != curinst->r)
+			if(r != ci->r)
 				goto Done;
 		case OANY: /* fallthrough */
 			next = t->next;
-			t->pc = curinst + 1;
+			t->i = ci + 1;
 			t->next = nil;
 			*nlist->tail = t;
 			nlist->tail = &t->next;
@@ -76,18 +91,18 @@
 			if(next == nil)
 				break;
 			t = next;
-			curinst = t->pc;
+			ci = t->i;
 			goto Again;
 		case OCLASS:
 		Class:
-			if(*rsp < curinst->r)
+			if(r < ci->r)
 				goto Done;
-			if(*rsp > curinst->r1) {
-				curinst++;
+			if(r > ci->r1) {
+				ci++;
 				goto Class;
 			}
 			next = t->next;
-			t->pc = curinst->a;
+			t->i = ci->a;
 			t->next = nil;
 			*nlist->tail = t;
 			nlist->tail = &t->next;
@@ -94,56 +109,53 @@
 			if(next == nil)
 				break;
 			t = next;
-			curinst = t->pc;
+			ci = t->i;
 			goto Again;
 		case ONOTNL:
-			if(*rsp != L'\n') {
-				curinst++;
+			if(r != L'\n') {
+				ci++;
 				goto Again;
 			}
 			goto Done;
 		case OBOL:
 			if(rsp == str || rsp[-1] == L'\n') {
-				curinst++;
+				ci++;
 				goto Again;
 			}
 			goto Done;
 		case OEOL:
-			if(*rsp == '\n' || *rsp == L'\0' && rep == nil) {
-				curinst++;
+			if(r == L'\n' || r == L'\0' && rep == nil) {
+				ci++;
 				goto Again;
 			}
 			goto Done;
 		case OJMP:
-			curinst = curinst->a;
+			ci = ci->a;
 			goto Again;
 		case OSPLIT:
-			if(avail == nil)
-				next = --pooltop;
-			else {
+			if(avail == nil) {
+				assert(pool < p->threads + p->nthr);
+				next = pool++;
+			} else {
 				next = avail;
 				avail = avail->next;
 			}
-			next->pc = curinst->b;
+			next->i = ci->b;
 			if(msize > 0)
 				memcpy(next->sem, t->sem, sizeof(Resub)*msize);
-			next->pri = t->pri;
 			next->next = t->next;
+			next->gen = t->gen;
 			t->next = next;
-			curinst = curinst->a;
+			ci = ci->a;
 			goto Again;
 		case OSAVE:
-			if(curinst->sub < msize)
-				t->sem[curinst->sub].rsp = rsp;
-			curinst++;
+			if(ci->sub < msize)
+				t->sem[ci->sub].rsp = rsp;
+			ci++;
 			goto Again;
 		case OUNSAVE:
-			if(curinst->sub == 0) {
-				/* "Highest" priority is the left-most longest. */
-				if (t->pri > matchpri)
-					goto Done;
-				match = 1;
-				matchpri = t->pri;
+			if(ci->sub == 0) {
+				matchgen = t->gen;
 				if(sem != nil && msize > 0) {
 					memcpy(sem, t->sem, sizeof(Resub)*msize);
 					sem->rep = rsp;
@@ -150,9 +162,9 @@
 				}
 				goto Done;
 			}
-			if(curinst->sub < msize)
-				t->sem[curinst->sub].rep = rsp;
-			curinst++;
+			if(ci->sub < msize)
+				t->sem[ci->sub].rep = rsp;
+			ci++;
 			goto Again;
 		Done:
 			next = t->next;
@@ -161,30 +173,9 @@
 			if(next == nil)
 				break;
 			t = next;
-			curinst = t->pc;
+			ci = t->i;
 			goto Again;
 		}
-Start:
-		/* Start again once if we haven't found anything. */
-		if(first == 1 && match == 0) {
-			first = 0;
-			if(avail == nil)
-				t = --pooltop;
-			else {
-				t = avail;
-				avail = avail->next;
-			}
-			if(msize > 0)
-				memset(t->sem, 0, sizeof(Resub)*msize);
-			/* "Lower" priority thread */
-			t->pri = matchpri = pri++;
-			t->next = nil;
-			curinst = prog->startinst;
-			goto Again;
-		}
-		/* If we have a match and no extant threads, we are done. */
-		if(match == 1 && nlist->head == nil)
-			break;
 		tmp = clist;
 		clist = nlist;
 		nlist = tmp;
@@ -193,5 +184,5 @@
 	}
 	if(rep != nil)
 		*rep = endr;
-	return match;
+	return matchgen > 0 ? 1 : 0;
 }