shithub: riscv

Download patch

ref: 37b86df09ff381bcc4f60802d43e57bd9bfcac73
parent: 52dc943702a8f7815546e76286b153c3813e1db0
author: Ori Bernstein <[email protected]>
date: Tue Mar 17 18:03:25 EDT 2020

Improve the posix preprocessor.

This fixes token pasting, making it expand when
it should expand, and paste before expansion when
it should paste before expanding.

	#define CAT(a, b) a ## b
	#define BAR	3
	#define FOO	CAT(BAR, 3)
	FOO

now produces 33, while

	#define CAT(a, b) a ## b
	#define EOF	(-1)
	#define NOP(x)	x
	NOP(CAT(foo, EOF))
	CAT(,EOF)
	CAT(,)

produces

	fooEOF
	(-1)
	<empty>

respectively.

--- a/sys/src/cmd/cpp/cpp.c
+++ b/sys/src/cmd/cpp/cpp.c
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
--- a/sys/src/cmd/cpp/cpp.h
+++ b/sys/src/cmd/cpp/cpp.h
@@ -14,7 +14,7 @@
 		EQ, NEQ, LEQ, GEQ, LSH, RSH, LAND, LOR, PPLUS, MMINUS,
 		ARROW, SBRA, SKET, LP, RP, DOT, AND, STAR, PLUS, MINUS,
 		TILDE, NOT, SLASH, PCT, LT, GT, CIRC, OR, QUEST,
-		COLON, ASGN, COMMA, SHARP, SEMIC, CBRA, CKET,
+		COLON, ASGN, COMMA, XCOMMA, SHARP, SEMIC, CBRA, CKET,
 		ASPLUS, ASMINUS, ASSTAR, ASSLASH, ASPCT, ASCIRC, ASLSH,
 		ASRSH, ASOR, ASAND, ELLIPS,
 		DSHARP1, NAME1, DEFINED, UMINUS };
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+void	glue(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
--- a/sys/src/cmd/cpp/eval.c
+++ b/sys/src/cmd/cpp/eval.c
@@ -28,66 +28,67 @@
 	char	arity;
 	char	ctype;
 } priority[] = {
-	{ 0, 0, 0 },		/* END */
-	{ 0, 0, 0 },		/* UNCLASS */
-	{ 0, 0, 0 },		/* NAME */
-	{ 0, 0, 0 },		/* NUMBER */
-	{ 0, 0, 0 },		/* STRING */
-	{ 0, 0, 0 },		/* CCON */
-	{ 0, 0, 0 },		/* NL */
-	{ 0, 0, 0 },		/* WS */
-	{ 0, 0, 0 },		/* DSHARP */
-	{ 11, 2, RELAT },	/* EQ */
-	{ 11, 2, RELAT },	/* NEQ */
-	{ 12, 2, RELAT },	/* LEQ */
-	{ 12, 2, RELAT },	/* GEQ */
-	{ 13, 2, SHIFT },	/* LSH */
-	{ 13, 2, SHIFT },	/* RSH */
-	{ 7, 2, LOGIC },	/* LAND */
-	{ 6, 2, LOGIC },	/* LOR */
-	{ 0, 0, 0 },		/* PPLUS */
-	{ 0, 0, 0 },		/* MMINUS */
-	{ 0, 0, 0 },		/* ARROW */
-	{ 0, 0, 0 },		/* SBRA */
-	{ 0, 0, 0 },		/* SKET */
-	{ 3, 0, 0 },		/* LP */
-	{ 3, 0, 0 },		/* RP */
-	{ 0, 0, 0 },		/* DOT */
-	{ 10, 2, ARITH },	/* AND */
-	{ 15, 2, ARITH },	/* STAR */
-	{ 14, 2, ARITH },	/* PLUS */
-	{ 14, 2, ARITH },	/* MINUS */
-	{ 16, 1, UNARY },	/* TILDE */
-	{ 16, 1, UNARY },	/* NOT */
-	{ 15, 2, ARITH },	/* SLASH */
-	{ 15, 2, ARITH },	/* PCT */
-	{ 12, 2, RELAT },	/* LT */
-	{ 12, 2, RELAT },	/* GT */
-	{ 9, 2, ARITH },	/* CIRC */
-	{ 8, 2, ARITH },	/* OR */
-	{ 5, 2, SPCL },		/* QUEST */
-	{ 5, 2, SPCL },		/* COLON */
-	{ 0, 0, 0 },		/* ASGN */
-	{ 4, 2, 0 },		/* COMMA */
-	{ 0, 0, 0 },		/* SHARP */
-	{ 0, 0, 0 },		/* SEMIC */
-	{ 0, 0, 0 },		/* CBRA */
-	{ 0, 0, 0 },		/* CKET */
-	{ 0, 0, 0 },		/* ASPLUS */
- 	{ 0, 0, 0 },		/* ASMINUS */
- 	{ 0, 0, 0 },		/* ASSTAR */
- 	{ 0, 0, 0 },		/* ASSLASH */
- 	{ 0, 0, 0 },		/* ASPCT */
- 	{ 0, 0, 0 },		/* ASCIRC */
- 	{ 0, 0, 0 },		/* ASLSH */
-	{ 0, 0, 0 },		/* ASRSH */
- 	{ 0, 0, 0 },		/* ASOR */
- 	{ 0, 0, 0 },		/* ASAND */
-	{ 0, 0, 0 },		/* ELLIPS */
-	{ 0, 0, 0 },		/* DSHARP1 */
-	{ 0, 0, 0 },		/* NAME1 */
-	{ 16, 1, UNARY },	/* DEFINED */
-	{ 16, 0, UNARY },	/* UMINUS */
+	[END]		{ 0, 0, 0 },
+	[UNCLASS]	{ 0, 0, 0 },
+	[NAME]		{ 0, 0, 0 },
+	[NUMBER]	{ 0, 0, 0 },
+	[STRING]	{ 0, 0, 0 },
+	[CCON]		{ 0, 0, 0 },
+	[NL]		{ 0, 0, 0 },
+	[WS]		{ 0, 0, 0 },
+	[DSHARP]	{ 0, 0, 0 },
+	[EQ]		{ 11, 2, RELAT },
+	[NEQ]		{ 11, 2, RELAT },
+	[LEQ]		{ 12, 2, RELAT },
+	[GEQ]		{ 12, 2, RELAT },
+	[LSH]		{ 13, 2, SHIFT },
+	[RSH]		{ 13, 2, SHIFT },
+	[LAND]		{ 7, 2, LOGIC },
+	[LOR]		{ 6, 2, LOGIC },
+	[PPLUS]		{ 0, 0, 0 },
+	[MMINUS]	{ 0, 0, 0 },
+	[ARROW]		{ 0, 0, 0 },
+	[SBRA]		{ 0, 0, 0 },
+	[SKET]		{ 0, 0, 0 },
+	[LP]		{ 3, 0, 0 },
+	[RP]		{ 3, 0, 0 },
+	[DOT]		{ 0, 0, 0 },
+	[AND]		{ 10, 2, ARITH },
+	[STAR]		{ 15, 2, ARITH },
+	[PLUS]		{ 14, 2, ARITH },
+	[MINUS]		{ 14, 2, ARITH },
+	[TILDE]		{ 16, 1, UNARY },
+	[NOT]		{ 16, 1, UNARY },
+	[SLASH]		{ 15, 2, ARITH },
+	[PCT]		{ 15, 2, ARITH },
+	[LT]		{ 12, 2, RELAT },
+	[GT]		{ 12, 2, RELAT },
+	[CIRC]		{ 9, 2, ARITH },
+	[OR]		{ 8, 2, ARITH },
+	[QUEST]		{ 5, 2, SPCL },
+	[COLON]		{ 5, 2, SPCL },
+	[ASGN]		{ 0, 0, 0 },
+	[COMMA]		{ 4, 2, 0 },
+	[XCOMMA]	{ 4, 2, 0 },
+	[SHARP]		{ 0, 0, 0 },
+	[SEMIC]		{ 0, 0, 0 },
+	[CBRA]		{ 0, 0, 0 },
+	[CKET]		{ 0, 0, 0 },
+	[ASPLUS]	{ 0, 0, 0 },
+ 	[ASMINUS]	{ 0, 0, 0 },
+ 	[ASSTAR]	{ 0, 0, 0 },
+ 	[ASSLASH]	{ 0, 0, 0 },
+ 	[ASPCT]		{ 0, 0, 0 },
+ 	[ASCIRC]	{ 0, 0, 0 },
+ 	[ASLSH]		{ 0, 0, 0 },
+	[ASRSH]		{ 0, 0, 0 },
+ 	[ASOR]		{ 0, 0, 0 },
+ 	[ASAND]		{ 0, 0, 0 },
+	[ELLIPS]	{ 0, 0, 0 },
+	[DSHARP1]	{ 0, 0, 0 },
+	[NAME1]		{ 0, 0, 0 },
+	[DEFINED]	{ 16, 1, UNARY },
+	[UMINUS]	{ 16, 0, UNARY },
 };
 
 int	evalop(struct pri);
@@ -116,7 +117,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
@@ -165,7 +166,7 @@
 		case EQ: case NEQ: case LEQ: case GEQ: case LSH: case RSH:
 		case LAND: case LOR: case SLASH: case PCT:
 		case LT: case GT: case CIRC: case OR: case QUEST:
-		case COLON: case COMMA:
+		case COLON: case COMMA: case XCOMMA:
 			if (rand==0)
 				goto syntax;
 			if (evalop(priority[tp->type])!=0)
--- a/sys/src/cmd/cpp/include.c
+++ b/sys/src/cmd/cpp/include.c
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
--- a/sys/src/cmd/cpp/macro.c
+++ b/sys/src/cmd/cpp/macro.c
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -193,12 +193,14 @@
 	int hs;
 
 	copytokenrow(&ntr, np->vp);		/* copy macro value */
-	if (np->ap==NULL)			/* parameterless */
+	if (np->ap==NULL) {			/* parameterless */
 		ntokc = 1;
-	else {
+		/* substargs for handling # and ## */
+		atr[0] = nil;
+		substargs(np, &ntr, atr);
+	} else {
 		ntokc = gatherargs(trp, atr, (np->flag&ISVARMAC) ? rowlen(np->ap) : 0, &narg);
 		if (narg<0) {			/* not actually a call (no '(') */
-/* error(WARNING, "%d %r\n", narg, trp); */
 			/* gatherargs has already pushed trp->tr to the next token */
 			return;
 		}
@@ -214,8 +216,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -228,8 +228,7 @@
 	ntr.tp = ntr.bp;
 	insertrow(trp, ntokc, &ntr);
 	trp->tp -= rowlen(&ntr);
-	dofree(ntr.bp);
-	return;
+	free(ntr.bp);
 }	
 
 /*
@@ -255,7 +254,6 @@
 		if (trp->tp >= trp->lp) {
 			gettokens(trp, 0);
 			if ((trp->lp-1)->type==END) {
-/* error(WARNING, "reach END\n"); */
 				trp->lp -= 1;
 				if (*narg>=0)
 					trp->tp -= ntok;
@@ -326,7 +324,25 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	*ap = nil;
+	*an = nil;
+	/* EMPTY ## tok */
+	if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+		rtr->tp--;
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		*ntok = 1 + (*ap != nil) + (*an != nil);
+		return 1;
+	}
+	return 0;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,12 +350,14 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
-	int ntok, argno;
+	Tokenrow ttr;
+	Token *tp, *ap, *an, *pp, *pn;
+	int ntok, argno, hs;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
-		if (rtr->tp->type==SHARP) {	/* string operator */
+		if(rtr->tp->hideset && checkhideset(rtr->tp->hideset, np)) {
+			rtr->tp++;
+		} else if (rtr->tp->type==SHARP) {	/* string operator */
 			tp = rtr->tp;
 			rtr->tp += 1;
 			if ((argno = lookuparg(np, rtr->tp))<0) {
@@ -349,24 +367,52 @@
 			ntok = 1 + (rtr->tp - tp);
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
-			continue;
-		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
-			if (rtr->tp < rtr->bp)
-				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
-			else {
-				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
-				insertrow(rtr, 1, &tatr);
-				dofree(tatr.bp);
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = ap;
+			pn = an;
+			if (ap && (argno = lookuparg(np, ap)) >= 0){
+				pp = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
 			}
-			continue;
+			if (an && (argno = lookuparg(np, an)) >= 0) {
+				pn = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			}
+			glue(&ttr, pp, pn);
+			insertrow(rtr, ntok, &ttr);
+			free(ttr.bp);
+		} else if (rtr->tp->type==NAME) {
+			if((argno = lookuparg(np, rtr->tp)) >= 0) {
+				if (rtr->tp < rtr->bp) {
+					error(ERROR, "access out of bounds");
+					continue;
+				}
+				copytokenrow(&ttr, atr[argno]);
+				expandrow(&ttr, "<macro>");
+				insertrow(rtr, 1, &ttr);
+				free(ttr.bp);
+			} else {
+				maketokenrow(1, &ttr);
+				ttr.lp = ttr.tp + 1;
+				*ttr.tp = *rtr->tp;
+
+				hs = newhideset(rtr->tp->hideset, np);
+				if(ttr.tp->hideset == 0)
+					ttr.tp->hideset = hs;
+				else
+					ttr.tp->hideset = unionhideset(ttr.tp->hideset, hs);
+				expandrow(&ttr, (char*)np->name);
+				for(tp = ttr.bp; tp != ttr.lp; tp++)
+					if(tp->type == COMMA)
+						tp->type = XCOMMA;
+				insertrow(rtr, 1, &ttr);
+				dofree(ttr.bp);
+			}
+		} else {
+			rtr->tp++;
 		}
-		rtr->tp++;
 	}
 }
 
@@ -374,41 +420,35 @@
  * Evaluate the ## operators in a tokenrow
  */
 void
-doconcat(Tokenrow *trp)
+glue(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt, *p, *n;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	dofree(tt);
+	if (np + nn == 0) {
+		ntr->lp = ntr->bp;
+	} else {
+		if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) {
+			p = tp ? (char*)tp->t : "<empty>";
+			n = tn ? (char*)tn->t : "<empty>";
+			error(WARNING, "Bad token %r produced by %s ## %s", &ntr, p, n);
 		}
+		ntr->lp = ntr->bp+1;
 	}
+	makespace(ntr);
 }
 
 /*
--- a/sys/src/cmd/cpp/test.c
+++ b/sys/src/cmd/cpp/test.c
@@ -1,4 +1,61 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x NOP(CAT(foo, EOF)) y
+x NOP(CAT(EOF, foo)) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+X NOP(CAT(,)) y
+
+#define NCAT(a)	foo ## a
+NCAT(bar)
+
+#define XCAT(a)	## a
+foo XCAT(bar)
+
+#define CAT3(foo)	a##foo##b
+CAT3(blah)
+
+#define BAR	3
+#define FOO	CAT(BAR, 3)
+FOO
+
+/*
+ * CURRENTLY BROKEN:
+ *     __VA_ARGS__ requires at least one item.
+ *     It should accept an empty list.
+#define xprint(a, ...)	print(a, __VA_ARGS__)
+xprint("hi", "there")
+xprint("hi")
+*/
+
+#define C	a,b
+#define X(a)	a
+#define Y	X(C)
+Y
+
+#define    x          3
+#define    f(a)       f(x * (a))
+#undef     x
+#define    x          2
+#define    g          f
+#define    z          z[0]
+#define    h          g(~
+#define    m(a)       a(w)
+#define    w          0,1
+#define    t(a)       a
+#define    p()        int
+#define    q(x)       x
+#define    r(x,y)     x ## y
+#define    str(x)     # x
+f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+g(x+(3,4)-w) | h 5) & m
+(f)^m(m);
+/*
+ * CURRENTLY BROKEN:
+ *     mac() needs at least one argument.
+ *     It should treat no args as a single empty arg list.
+p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
+char c[2][6] = { str(hello), str() };
+*/
\ No newline at end of file
--- /dev/null
+++ b/sys/src/cmd/cpp/test.expected
@@ -1,0 +1,49 @@
+#line 1 "/usr/ori/src/cpp/test.c"
+
+
+
+x fooEOF y
+x EOFfoo y
+x(-1) y
+y foo x
+x foo y
+X y
+
+
+ foobar
+
+
+foo ## bar
+
+
+ ablahb
+
+
+
+ 33
+
+
+#line 32 "/usr/ori/src/cpp/test.c"
+
+
+
+
+ a,b
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+ f(2 * (2+(3,4)- 0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^ m(0,1);
+#line 55 "/usr/ori/src/cpp/test.c"