ref: 37b86df09ff381bcc4f60802d43e57bd9bfcac73
parent: 52dc943702a8f7815546e76286b153c3813e1db0
author: Ori Bernstein <[email protected]>
date: Tue Mar 17 18:03:25 EDT 2020
Improve the posix preprocessor. This fixes token pasting, making it expand when it should expand, and paste before expansion when it should paste before expanding. #define CAT(a, b) a ## b #define BAR 3 #define FOO CAT(BAR, 3) FOO now produces 33, while #define CAT(a, b) a ## b #define EOF (-1) #define NOP(x) x NOP(CAT(foo, EOF)) CAT(,EOF) CAT(,) produces fooEOF (-1) <empty> respectively.
--- a/sys/src/cmd/cpp/cpp.c
+++ b/sys/src/cmd/cpp/cpp.c
@@ -68,7 +68,7 @@
trp->tp += 1;
control(trp);
} else if (!skipping && anymacros)
- expandrow(trp, NULL, Notinmacro);
+ expandrow(trp, NULL);
if (skipping)
setempty(trp);
puttokens(trp);
@@ -217,7 +217,7 @@
case KLINE:
trp->tp = tp+1;
- expandrow(trp, "<line>", Notinmacro);
+ expandrow(trp, "<line>");
tp = trp->bp+2;
kline:
if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
--- a/sys/src/cmd/cpp/cpp.h
+++ b/sys/src/cmd/cpp/cpp.h
@@ -14,7 +14,7 @@
EQ, NEQ, LEQ, GEQ, LSH, RSH, LAND, LOR, PPLUS, MMINUS,
ARROW, SBRA, SKET, LP, RP, DOT, AND, STAR, PLUS, MINUS,
TILDE, NOT, SLASH, PCT, LT, GT, CIRC, OR, QUEST,
- COLON, ASGN, COMMA, SHARP, SEMIC, CBRA, CKET,
+ COLON, ASGN, COMMA, XCOMMA, SHARP, SEMIC, CBRA, CKET,
ASPLUS, ASMINUS, ASSTAR, ASSLASH, ASPCT, ASCIRC, ASLSH,
ASRSH, ASOR, ASAND, ELLIPS,
DSHARP1, NAME1, DEFINED, UMINUS };
@@ -107,11 +107,11 @@
void doadefine(Tokenrow *, int);
void doinclude(Tokenrow *);
void doif(Tokenrow *, enum kwtype);
-void expand(Tokenrow *, Nlist *, int);
+void expand(Tokenrow *, Nlist *);
void builtin(Tokenrow *, int);
int gatherargs(Tokenrow *, Tokenrow **, int, int *);
void substargs(Nlist *, Tokenrow *, Tokenrow **);
-void expandrow(Tokenrow *, char *, int);
+void expandrow(Tokenrow *, char *);
void maketokenrow(int, Tokenrow *);
Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
Token *growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
void movetokenrow(Tokenrow *, Tokenrow *);
void insertrow(Tokenrow *, int, Tokenrow *);
void peektokens(Tokenrow *, char *);
-void doconcat(Tokenrow *);
+void glue(Tokenrow *, Token *, Token *);
Tokenrow *stringify(Tokenrow *);
int lookuparg(Nlist *, Token *);
long eval(Tokenrow *, int);
--- a/sys/src/cmd/cpp/eval.c
+++ b/sys/src/cmd/cpp/eval.c
@@ -28,66 +28,67 @@
char arity;
char ctype;
} priority[] = {
- { 0, 0, 0 }, /* END */
- { 0, 0, 0 }, /* UNCLASS */
- { 0, 0, 0 }, /* NAME */
- { 0, 0, 0 }, /* NUMBER */
- { 0, 0, 0 }, /* STRING */
- { 0, 0, 0 }, /* CCON */
- { 0, 0, 0 }, /* NL */
- { 0, 0, 0 }, /* WS */
- { 0, 0, 0 }, /* DSHARP */
- { 11, 2, RELAT }, /* EQ */
- { 11, 2, RELAT }, /* NEQ */
- { 12, 2, RELAT }, /* LEQ */
- { 12, 2, RELAT }, /* GEQ */
- { 13, 2, SHIFT }, /* LSH */
- { 13, 2, SHIFT }, /* RSH */
- { 7, 2, LOGIC }, /* LAND */
- { 6, 2, LOGIC }, /* LOR */
- { 0, 0, 0 }, /* PPLUS */
- { 0, 0, 0 }, /* MMINUS */
- { 0, 0, 0 }, /* ARROW */
- { 0, 0, 0 }, /* SBRA */
- { 0, 0, 0 }, /* SKET */
- { 3, 0, 0 }, /* LP */
- { 3, 0, 0 }, /* RP */
- { 0, 0, 0 }, /* DOT */
- { 10, 2, ARITH }, /* AND */
- { 15, 2, ARITH }, /* STAR */
- { 14, 2, ARITH }, /* PLUS */
- { 14, 2, ARITH }, /* MINUS */
- { 16, 1, UNARY }, /* TILDE */
- { 16, 1, UNARY }, /* NOT */
- { 15, 2, ARITH }, /* SLASH */
- { 15, 2, ARITH }, /* PCT */
- { 12, 2, RELAT }, /* LT */
- { 12, 2, RELAT }, /* GT */
- { 9, 2, ARITH }, /* CIRC */
- { 8, 2, ARITH }, /* OR */
- { 5, 2, SPCL }, /* QUEST */
- { 5, 2, SPCL }, /* COLON */
- { 0, 0, 0 }, /* ASGN */
- { 4, 2, 0 }, /* COMMA */
- { 0, 0, 0 }, /* SHARP */
- { 0, 0, 0 }, /* SEMIC */
- { 0, 0, 0 }, /* CBRA */
- { 0, 0, 0 }, /* CKET */
- { 0, 0, 0 }, /* ASPLUS */
- { 0, 0, 0 }, /* ASMINUS */
- { 0, 0, 0 }, /* ASSTAR */
- { 0, 0, 0 }, /* ASSLASH */
- { 0, 0, 0 }, /* ASPCT */
- { 0, 0, 0 }, /* ASCIRC */
- { 0, 0, 0 }, /* ASLSH */
- { 0, 0, 0 }, /* ASRSH */
- { 0, 0, 0 }, /* ASOR */
- { 0, 0, 0 }, /* ASAND */
- { 0, 0, 0 }, /* ELLIPS */
- { 0, 0, 0 }, /* DSHARP1 */
- { 0, 0, 0 }, /* NAME1 */
- { 16, 1, UNARY }, /* DEFINED */
- { 16, 0, UNARY }, /* UMINUS */
+ [END] { 0, 0, 0 },
+ [UNCLASS] { 0, 0, 0 },
+ [NAME] { 0, 0, 0 },
+ [NUMBER] { 0, 0, 0 },
+ [STRING] { 0, 0, 0 },
+ [CCON] { 0, 0, 0 },
+ [NL] { 0, 0, 0 },
+ [WS] { 0, 0, 0 },
+ [DSHARP] { 0, 0, 0 },
+ [EQ] { 11, 2, RELAT },
+ [NEQ] { 11, 2, RELAT },
+ [LEQ] { 12, 2, RELAT },
+ [GEQ] { 12, 2, RELAT },
+ [LSH] { 13, 2, SHIFT },
+ [RSH] { 13, 2, SHIFT },
+ [LAND] { 7, 2, LOGIC },
+ [LOR] { 6, 2, LOGIC },
+ [PPLUS] { 0, 0, 0 },
+ [MMINUS] { 0, 0, 0 },
+ [ARROW] { 0, 0, 0 },
+ [SBRA] { 0, 0, 0 },
+ [SKET] { 0, 0, 0 },
+ [LP] { 3, 0, 0 },
+ [RP] { 3, 0, 0 },
+ [DOT] { 0, 0, 0 },
+ [AND] { 10, 2, ARITH },
+ [STAR] { 15, 2, ARITH },
+ [PLUS] { 14, 2, ARITH },
+ [MINUS] { 14, 2, ARITH },
+ [TILDE] { 16, 1, UNARY },
+ [NOT] { 16, 1, UNARY },
+ [SLASH] { 15, 2, ARITH },
+ [PCT] { 15, 2, ARITH },
+ [LT] { 12, 2, RELAT },
+ [GT] { 12, 2, RELAT },
+ [CIRC] { 9, 2, ARITH },
+ [OR] { 8, 2, ARITH },
+ [QUEST] { 5, 2, SPCL },
+ [COLON] { 5, 2, SPCL },
+ [ASGN] { 0, 0, 0 },
+ [COMMA] { 4, 2, 0 },
+ [XCOMMA] { 4, 2, 0 },
+ [SHARP] { 0, 0, 0 },
+ [SEMIC] { 0, 0, 0 },
+ [CBRA] { 0, 0, 0 },
+ [CKET] { 0, 0, 0 },
+ [ASPLUS] { 0, 0, 0 },
+ [ASMINUS] { 0, 0, 0 },
+ [ASSTAR] { 0, 0, 0 },
+ [ASSLASH] { 0, 0, 0 },
+ [ASPCT] { 0, 0, 0 },
+ [ASCIRC] { 0, 0, 0 },
+ [ASLSH] { 0, 0, 0 },
+ [ASRSH] { 0, 0, 0 },
+ [ASOR] { 0, 0, 0 },
+ [ASAND] { 0, 0, 0 },
+ [ELLIPS] { 0, 0, 0 },
+ [DSHARP1] { 0, 0, 0 },
+ [NAME1] { 0, 0, 0 },
+ [DEFINED] { 16, 1, UNARY },
+ [UMINUS] { 16, 0, UNARY },
};
int evalop(struct pri);
@@ -116,7 +117,7 @@
}
ntok = trp->tp - trp->bp;
kwdefined->val = KDEFINED; /* activate special meaning of defined */
- expandrow(trp, "<if>", Notinmacro);
+ expandrow(trp, "<if>");
kwdefined->val = NAME;
vp = vals;
op = ops;
@@ -165,7 +166,7 @@
case EQ: case NEQ: case LEQ: case GEQ: case LSH: case RSH:
case LAND: case LOR: case SLASH: case PCT:
case LT: case GT: case CIRC: case OR: case QUEST:
- case COLON: case COMMA:
+ case COLON: case COMMA: case XCOMMA:
if (rand==0)
goto syntax;
if (evalop(priority[tp->type])!=0)
--- a/sys/src/cmd/cpp/include.c
+++ b/sys/src/cmd/cpp/include.c
@@ -18,7 +18,7 @@
goto syntax;
if (trp->tp->type!=STRING && trp->tp->type!=LT) {
len = trp->tp - trp->bp;
- expandrow(trp, "<include>", Notinmacro);
+ expandrow(trp, "<include>");
trp->tp = trp->bp+len;
}
if (trp->tp->type==STRING) {
--- a/sys/src/cmd/cpp/macro.c
+++ b/sys/src/cmd/cpp/macro.c
@@ -138,7 +138,7 @@
* Flag is NULL if more input can be gathered.
*/
void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
{
Token *tp;
Nlist *np;
@@ -170,7 +170,7 @@
if (np->flag&ISMAC)
builtin(trp, np->val);
else {
- expand(trp, np, inmacro);
+ expand(trp, np);
}
tp = trp->tp;
}
@@ -184,7 +184,7 @@
* (ordinarily the beginning of the expansion)
*/
void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
{
Tokenrow ntr;
int ntokc, narg, i;
@@ -193,12 +193,14 @@
int hs;
copytokenrow(&ntr, np->vp); /* copy macro value */
- if (np->ap==NULL) /* parameterless */
+ if (np->ap==NULL) { /* parameterless */
ntokc = 1;
- else {
+ /* substargs for handling # and ## */
+ atr[0] = nil;
+ substargs(np, &ntr, atr);
+ } else {
ntokc = gatherargs(trp, atr, (np->flag&ISVARMAC) ? rowlen(np->ap) : 0, &narg);
if (narg<0) { /* not actually a call (no '(') */
-/* error(WARNING, "%d %r\n", narg, trp); */
/* gatherargs has already pushed trp->tr to the next token */
return;
}
@@ -214,8 +216,6 @@
dofree(atr[i]);
}
}
- if(!inmacro)
- doconcat(&ntr); /* execute ## operators */
hs = newhideset(trp->tp->hideset, np);
for (tp=ntr.bp; tp<ntr.lp; tp++) { /* distribute hidesets */
if (tp->type==NAME) {
@@ -228,8 +228,7 @@
ntr.tp = ntr.bp;
insertrow(trp, ntokc, &ntr);
trp->tp -= rowlen(&ntr);
- dofree(ntr.bp);
- return;
+ free(ntr.bp);
}
/*
@@ -255,7 +254,6 @@
if (trp->tp >= trp->lp) {
gettokens(trp, 0);
if ((trp->lp-1)->type==END) {
-/* error(WARNING, "reach END\n"); */
trp->lp -= 1;
if (*narg>=0)
trp->tp -= ntok;
@@ -326,7 +324,25 @@
}
return ntok;
}
-
+
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+ *ap = nil;
+ *an = nil;
+ /* EMPTY ## tok */
+ if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+ rtr->tp--;
+ /* tok ## tok */
+ if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+ *ap = rtr->tp;
+ if(rtr->tp + 2 != rtr->lp)
+ *an = rtr->tp + 2;
+ *ntok = 1 + (*ap != nil) + (*an != nil);
+ return 1;
+ }
+ return 0;
+}
/*
* substitute the argument list into the replacement string
* This would be simple except for ## and #
@@ -334,12 +350,14 @@
void
substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
{
- Tokenrow tatr;
- Token *tp;
- int ntok, argno;
+ Tokenrow ttr;
+ Token *tp, *ap, *an, *pp, *pn;
+ int ntok, argno, hs;
for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
- if (rtr->tp->type==SHARP) { /* string operator */
+ if(rtr->tp->hideset && checkhideset(rtr->tp->hideset, np)) {
+ rtr->tp++;
+ } else if (rtr->tp->type==SHARP) { /* string operator */
tp = rtr->tp;
rtr->tp += 1;
if ((argno = lookuparg(np, rtr->tp))<0) {
@@ -349,24 +367,52 @@
ntok = 1 + (rtr->tp - tp);
rtr->tp = tp;
insertrow(rtr, ntok, stringify(atr[argno]));
- continue;
- }
- if (rtr->tp->type==NAME
- && (argno = lookuparg(np, rtr->tp)) >= 0) {
- if (rtr->tp < rtr->bp)
- error(ERROR, "access out of bounds");
- if ((rtr->tp+1)->type==DSHARP
- || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
- insertrow(rtr, 1, atr[argno]);
- else {
- copytokenrow(&tatr, atr[argno]);
- expandrow(&tatr, "<macro>", Inmacro);
- insertrow(rtr, 1, &tatr);
- dofree(tatr.bp);
+ } else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+ pp = ap;
+ pn = an;
+ if (ap && (argno = lookuparg(np, ap)) >= 0){
+ pp = nil;
+ if(atr[argno]->tp != atr[argno]->lp)
+ pp = atr[argno]->lp - 1;
}
- continue;
+ if (an && (argno = lookuparg(np, an)) >= 0) {
+ pn = nil;
+ if(atr[argno]->tp != atr[argno]->lp)
+ pn = atr[argno]->lp - 1;
+ }
+ glue(&ttr, pp, pn);
+ insertrow(rtr, ntok, &ttr);
+ free(ttr.bp);
+ } else if (rtr->tp->type==NAME) {
+ if((argno = lookuparg(np, rtr->tp)) >= 0) {
+ if (rtr->tp < rtr->bp) {
+ error(ERROR, "access out of bounds");
+ continue;
+ }
+ copytokenrow(&ttr, atr[argno]);
+ expandrow(&ttr, "<macro>");
+ insertrow(rtr, 1, &ttr);
+ free(ttr.bp);
+ } else {
+ maketokenrow(1, &ttr);
+ ttr.lp = ttr.tp + 1;
+ *ttr.tp = *rtr->tp;
+
+ hs = newhideset(rtr->tp->hideset, np);
+ if(ttr.tp->hideset == 0)
+ ttr.tp->hideset = hs;
+ else
+ ttr.tp->hideset = unionhideset(ttr.tp->hideset, hs);
+ expandrow(&ttr, (char*)np->name);
+ for(tp = ttr.bp; tp != ttr.lp; tp++)
+ if(tp->type == COMMA)
+ tp->type = XCOMMA;
+ insertrow(rtr, 1, &ttr);
+ dofree(ttr.bp);
+ }
+ } else {
+ rtr->tp++;
}
- rtr->tp++;
}
}
@@ -374,41 +420,35 @@
* Evaluate the ## operators in a tokenrow
*/
void
-doconcat(Tokenrow *trp)
+glue(Tokenrow *ntr, Token *tp, Token *tn)
{
- Token *ltp, *ntp;
- Tokenrow ntr;
- int len;
+ int np, nn;
+ char *tt, *p, *n;
- for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
- if (trp->tp->type==DSHARP1)
- trp->tp->type = DSHARP;
- else if (trp->tp->type==DSHARP) {
- char tt[128];
- ltp = trp->tp-1;
- ntp = trp->tp+1;
- if (ltp<trp->bp || ntp>=trp->lp) {
- error(ERROR, "## occurs at border of replacement");
- continue;
- }
- len = ltp->len + ntp->len;
- strncpy((char*)tt, (char*)ltp->t, ltp->len);
- strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
- tt[len] = '\0';
- setsource("<##>", -1, tt);
- maketokenrow(3, &ntr);
- gettokens(&ntr, 1);
- unsetsource();
- if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
- error(WARNING, "Bad token %r produced by ##", &ntr);
- ntr.lp = ntr.bp+1;
- trp->tp = ltp;
- makespace(&ntr);
- insertrow(trp, (ntp-ltp)+1, &ntr);
- dofree(ntr.bp);
- trp->tp--;
+ np = tp ? tp->len : 0;
+ nn = tn ? tn->len : 0;
+ tt = domalloc(np + nn + 1);
+ if(tp)
+ memcpy(tt, tp->t, tp->len);
+ if(tn)
+ memcpy(tt+np, tn->t, tn->len);
+ tt[np+nn] = '\0';
+ setsource("<##>", -1, tt);
+ maketokenrow(3, ntr);
+ gettokens(ntr, 1);
+ unsetsource();
+ dofree(tt);
+ if (np + nn == 0) {
+ ntr->lp = ntr->bp;
+ } else {
+ if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) {
+ p = tp ? (char*)tp->t : "<empty>";
+ n = tn ? (char*)tn->t : "<empty>";
+ error(WARNING, "Bad token %r produced by %s ## %s", &ntr, p, n);
}
+ ntr->lp = ntr->bp+1;
}
+ makespace(ntr);
}
/*
--- a/sys/src/cmd/cpp/test.c
+++ b/sys/src/cmd/cpp/test.c
@@ -1,4 +1,61 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF (-1)
+x NOP(CAT(foo, EOF)) y
+x NOP(CAT(EOF, foo)) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+X NOP(CAT(,)) y
+
+#define NCAT(a) foo ## a
+NCAT(bar)
+
+#define XCAT(a) ## a
+foo XCAT(bar)
+
+#define CAT3(foo) a##foo##b
+CAT3(blah)
+
+#define BAR 3
+#define FOO CAT(BAR, 3)
+FOO
+
+/*
+ * CURRENTLY BROKEN:
+ * __VA_ARGS__ requires at least one item.
+ * It should accept an empty list.
+#define xprint(a, ...) print(a, __VA_ARGS__)
+xprint("hi", "there")
+xprint("hi")
+*/
+
+#define C a,b
+#define X(a) a
+#define Y X(C)
+Y
+
+#define x 3
+#define f(a) f(x * (a))
+#undef x
+#define x 2
+#define g f
+#define z z[0]
+#define h g(~
+#define m(a) a(w)
+#define w 0,1
+#define t(a) a
+#define p() int
+#define q(x) x
+#define r(x,y) x ## y
+#define str(x) # x
+f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+g(x+(3,4)-w) | h 5) & m
+(f)^m(m);
+/*
+ * CURRENTLY BROKEN:
+ * mac() needs at least one argument.
+ * It should treat no args as a single empty arg list.
+p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
+char c[2][6] = { str(hello), str() };
+*/
\ No newline at end of file
--- /dev/null
+++ b/sys/src/cmd/cpp/test.expected
@@ -1,0 +1,49 @@
+#line 1 "/usr/ori/src/cpp/test.c"
+
+
+
+x fooEOF y
+x EOFfoo y
+x(-1) y
+y foo x
+x foo y
+X y
+
+
+ foobar
+
+
+foo ## bar
+
+
+ ablahb
+
+
+
+ 33
+
+
+#line 32 "/usr/ori/src/cpp/test.c"
+
+
+
+
+ a,b
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+ f(2 * (2+(3,4)- 0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^ m(0,1);
+#line 55 "/usr/ori/src/cpp/test.c"