shithub: scc

Download patch

ref: e1b218ec5e6cc2745a43c5244bf6c2c0481b3e07
parent: dbacd2db0387ac4dbd726aa48b7a5bf765d69d4e
author: Roberto E. Vargas Caballero <[email protected]>
date: Fri Jul 17 15:30:37 EDT 2015

Rewrite and simplify the embedded preprocessor

The preprocessor dealt macro expansions in the same way that
includes. This was done in this way as a first attemp to avoid
recursion, but it only avoided direct recursion. Implement the
algorithm described in ANSI C is really complex, and the idea
of the embedded preprocessor was to have a fast and simple
preprocessor, so it is better to remove any idea of trying to
detect recursivity. It will simplify a lot the input/output
functions.
This version of the preprocessor uses the lexer of the compiler,
so the code is not so hardcore like the original which did all
the parsing directly with char pointers.

--- a/cc1/cc1.h
+++ b/cc1/cc1.h
@@ -11,7 +11,9 @@
 typedef struct symbol Symbol;
 typedef struct caselist Caselist;
 typedef struct node Node;
+typedef struct input Input;
 
+
 struct type {
 	unsigned char op;           /* type builder operator */
 	unsigned char ns;
@@ -70,6 +72,14 @@
 	unsigned char token;
 };
 
+struct input {
+	char *fname;
+	void *fp;
+	char *line, *begin, *p;
+	struct input *next;
+	unsigned short nline;
+};
+
 /*
  * Definition of enumerations
  */
@@ -96,6 +106,7 @@
 	NS_LABEL,
 	NS_CPP,
 	NS_KEYWORD,
+	NS_CPPCLAUSES,
 	NS_STRUCTS
 };
 
@@ -111,6 +122,13 @@
 	ISEXTERN   =128
 };
 
+
+/* lexer mode, compiler or preprocessor directive */
+enum {
+	CCMODE,
+	CPPMODE
+};
+
 /* input tokens */
 enum tokens {
 	TQUALIFIER = 128,
@@ -177,6 +195,14 @@
 	CONTINUE,
 	BREAK,
 	RETURN,
+	DEFINE,
+	INCLUDE,
+	LINE,
+	PRAGMA,
+	ERROR,
+	IFDEF,
+	IFNDEF,
+	UNDEF,
 	EOFTOK
 };
 
@@ -275,11 +301,7 @@
 extern bool moreinput(void);
 extern void expect(unsigned tok);
 extern void discard(void);
-extern char *getfname(void);
-extern unsigned short getfline(void);
-extern void setfname(char *name);
-extern void setfline(unsigned short line);
-extern bool addinput(char *fname, Symbol *sym, char *str);
+extern bool addinput(char *fname);
 extern void setnamespace(int ns);
 extern void setsafe(int type);
 extern void ilex(char *fname);
@@ -298,8 +320,8 @@
 
 /* cpp.c */
 extern void icpp(void);
-extern bool cpp(char *s);
-extern int expand(Symbol *sym);
+extern bool cpp(void);
+extern bool expand(char *begin, Symbol *sym);
 
 /*
  * Definition of global variables
@@ -310,6 +332,8 @@
 extern unsigned short yylen;
 extern int cppoff, disexpand;
 extern unsigned cppctx;
+extern Input *input;
+extern int lexmode;
 
 extern Type *voidtype, *pvoidtype, *booltype,	
             *uchartype,   *chartype,
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
@@ -19,7 +19,7 @@
 static unsigned arglen;
 static Symbol *symline, *symfile;
 static unsigned char ifstatus[NR_COND];
-static int paramerr;
+static Type *charptype;
 
 unsigned cppctx;
 int disexpand;
@@ -56,87 +56,26 @@
 	symfile = defmacro("__FILE__");
 }
 
-static bool
-iden(char **str)
-{
-	char c, *bp, *s = *str;
-
-	if (!isalpha(c = *s) && c != '_')
-		return 0;
-	for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = c) {
-		if ((c = *s) == '\0' || !isalnum(c) && c != '_')
-			break;
-		++s;
-	}
-	if (bp == &yytext[IDENTSIZ]) {
-		printerr("identifier too long in preprocessor");
-		return 0;
-	}
-	*bp = '\0';
-
-	while (isspace(*s))
-		++s;
-
-	*str = s;
-	return 1;
-}
-
-static bool
-string(char **input, char **str, char delim)
-{
-	char c, *s = *input;
-
-	if (str)
-		*str = s;
-
-	while ((c = *s) && c != delim)
-		++s;
-	if (c == '\0')
-		return 0;
-	*s++ = '\0';
-	*input = s;
-
-	return 1;
-}
-
 static void
-cleanup(char *s)
-{
-	while (isspace(*s))
-		++s;
-	if (*s != '\0')
-		printerr("trailing characters after preprocessor directive");
-}
-
-static void
 nextcpp(void)
 {
-	next();
-	if (yytoken == EOFTOK) {
-		printerr("unterminated argument list invoking macro \"%s\"",
-		      macroname);
-		goto mark_error;
-	}
-	if (yylen + 1 > arglen) {
-		printerr("argument overflow invoking macro \"%s\"",
-		      macroname);
-		goto mark_error;
-	}
-	memcpy(argp, yytext, yylen);
-	argp += yylen;
-	*argp++ = ' ';
-	arglen -= yylen + 1;
-	return;
-
-mark_error:
-	paramerr = 1;
-	yytoken = 0;
+        next();
+        if (yytoken == EOFTOK)
+		error("unterminated argument list invoking macro \"%s\"",
+                      macroname);
+        if (yylen + 1 > arglen)
+                error("argument overflow invoking macro \"%s\"",
+                      macroname);
+        memcpy(argp, yytext, yylen);
+        argp += yylen;
+        *argp++ = ' ';
+        arglen -= yylen + 1;
 }
 
 static void
 paren(void)
 {
-	while (!paramerr) {
+	for (;;) {
 		nextcpp();
 		switch (yytoken) {
 		case ')':
@@ -151,7 +90,7 @@
 static void
 parameter(void)
 {
-	while (!paramerr) {
+	for (;;) {
 		nextcpp();
 		switch (yytoken) {
 		case ')':
@@ -172,90 +111,55 @@
 	int n;
 
 	if (nargs == -1)
-		return 1;
-
-	if (ahead() != '(')
+		return -1;
+	if (ahead() != '(' && nargs > 0)
 		return 0;
 
 	disexpand = 1;
 	next();
-	paramerr = n = 0;
+	n = 0;
 	argp = buffer;
 	arglen = INPUTSIZ;
-	if (ahead() != ')') {
+	if (yytoken != ')') {
 		do {
 			*listp++ = argp;
 			parameter();
-		} while (!paramerr && ++n < NR_MACROARG && yytoken == ',');
+		} while (++n < NR_MACROARG && yytoken == ',');
 	}
+	if (yytoken != ')')
+		error("incorrect macro function alike invocation");
 	disexpand = 0;
 
-	if (paramerr)
-		return -1;
-	if (n == NR_MACROARG) {
-		printerr("too much parameters in macro \"%s\"", macroname);
-		return -1;
-	}
+	if (n == NR_MACROARG)
+		error("too much parameters in macro \"%s\"", macroname);
 	if (n != nargs) {
-		printerr("macro \"%s\" passed %d arguments, but it takes %d",
+		error("macro \"%s\" passed %d arguments, but it takes %d",
 		      macroname, n, nargs);
-		return -1;
 	}
 
 	return 1;
 }
 
-/*
- * sym->u.s is a string with the following format:
- * 	dd#string
- * where dd is the number of arguments of the macro
- * (-1 if it is a macro without arguments), and string
- * is the macro definition, where @dd@ indicates the
- * parameter number dd
- */
-#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2)
-int
-expand(Symbol *sym)
+static void
+copymacro(char *bp, char *s, size_t bufsiz, char *arglist[])
 {
-	unsigned len;
-	int r, n;
-	char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE];
-	char prevc, c, *bp, *lim, *arg, *s = sym->u.s;
+	char prevc, c, *arg;
 
-	fprintf(stderr, "macro %s:%s\n", sym->name, sym->u.s);
-	if (sym == symfile) {
-		sprintf(buffer, "\"%s\"", getfname());
-		goto add_macro;
-	}
-	if (sym == symline) {
-		sprintf(buffer, "%d", getfline());
-		goto add_macro;
-	}
-
-	macroname = sym->name;
-	if ((r = parsepars(arguments, arglist, atoi(s))) < 1)
-		return r;
-
-	for (n = 0; n < atoi(s); ++n)
-		fprintf(stderr, "PAR%d:%s\n", n, arglist[n]);
-
-	len = INPUTSIZ-1;
-	bp = buffer;
-	for (prevc = '\0', s += 3; c = *s; prevc = c, ++s) {
+	for (prevc = '\0'; c = *s; prevc = c, ++s) {
 		if (c != '@') {
 			if (c == '#')
 				continue;
-			if (len-- == 0)
+			if (bufsiz-- == 0)
 				goto expansion_too_long;
 			*bp++ = c;
 		} else {
-			unsigned size;
+			size_t size;
 
 			if (prevc == '#')
-				len -= 2;
+				bufsiz -= 2;
 			arg = arglist[atoi(++s)];
 			size = strlen(arg);
-			if (size > len)
+			if (size > bufsiz)
 				goto expansion_too_long;
 			if (prevc == '#')
 				*bp++ = '"';
@@ -263,190 +167,179 @@
 			bp += size;
 			if (prevc == '#')
 				*bp++ = '"';
-			len -= size;
+			bufsiz -= size;
 			s += 2;
 		}
 	}
-	*bp = '\0';
-	fprintf(stderr, "macro expanded:%s\n", buffer);
-add_macro:
-	addinput(NULL, sym, buffer);
-	return 1;
+        *bp = '\0';
 
+	return;
+
 expansion_too_long:
-	printerr("expansion of macro \"%s\" is too long", macroname);
-	return -1;
+	error("expansion of macro \"%s\" is too long", macroname);
 }
-#undef BUFSIZE
 
-/*
- * Parse an argument list (par0, par1, ...) and creates
- * an array with pointers to all the arguments in the
- * list
- */
-static char *
-parseargs(char *s, char *args[NR_MACROARG], int *nargs)
+#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2)
+bool
+expand(char *begin, Symbol *sym)
 {
-	int n;
 	size_t len;
-	char *endp, c;
+	int n;
+	char *s = sym->u.s;
+	char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE];
 
-	n = -1;
-	if (*s != '(')
-		goto set_nargs;
-	n = 0;
-	while (isspace(*s++))
-		/* nothing */;
-	if (*s == ')')
-		goto set_nargs;
-
-	for (n = 1; n <= NR_MACROARG; ++n) {
-		while (isspace(*s))
-			++s;
-		if (!isalpha(*s) && *s != '_') {
-			printerr("macro arguments must be identifiers");
-			return NULL;
-		}
-		for (endp = s+1; isalnum(*endp) || *endp == '_'; ++endp)
-			/* nothing */;
-		if ((len = endp - s) > IDENTSIZ) {
-			printerr("macro argument too long");
-			return NULL;
-		}
-		*args++ = s;
-		for (s = endp; isspace(*s); ++s)
-			*s = '\0';
-		c = *s;
-		*s++ = '\0';
-		if (c == ')')
-			break;
-		if (c == ',') {
-			continue;
-		} else {
-			printerr("macro parameters must be comma-separated");
-			return NULL;
-		}
+	fprintf(stderr, "macro '%s':%s\n", sym->name, sym->u.s);
+	if (sym == symfile) {
+		sprintf(buffer, "\"%s\"", input->fname);
+		goto print_subs;
 	}
-	if (n > NR_MACROARG) {
-		printerr("too much parameters in macro");
-		return NULL;
+	if (sym == symline) {
+		sprintf(buffer, "%d", input->line);
+		goto print_subs;
 	}
 
-set_nargs:
-	*nargs = n;
-	return s;
+	macroname = sym->name;
+	if (!parsepars(arguments, arglist, atoi(s)))
+		return 0;
+        for (n = 0; n < atoi(s); ++n)
+                fprintf(stderr, "PAR%d:%s\n", n, arglist[n]);
+
+	copymacro(buffer, s+3, INPUTSIZ-1, arglist);
+
+print_subs:
+	fprintf(stderr, "macro '%s' expanded to :'%s'\n", macroname, buffer);
+	len = strlen(buffer);
+
+	/* cut macro invocation */
+	memmove(begin, input->p, input->p - begin);
+	memmove(begin + len, begin, len);
+
+	/* paste macro expansion */
+	memcpy(begin, buffer, len);
+	input->p = input->begin = begin;
+
+	return 1;
 }
+#undef BUFSIZE
 
-/*
- * Copy a string define, and substitute formal arguments of the
- * macro into strings in the form @XX@, where XX is the position
- * of the argument in the argument list.
- */
-static bool
-copydefine(char *s, char *args[], char *buff, int bufsiz, int nargs)
+static int
+getpars(Symbol *args[NR_MACROARG])
 {
-	int n;
-	size_t ncopy;
-	char arroba[6], *p, **bp, c, prevc;
+	int n = -1;
+	char *err;
 
-	for (prevc = '\0'; c = *s++; prevc = c) {
-		if (!isalpha(c) && c != '_' || nargs < 1) {
-			if (bufsiz-- == 0)
-				goto too_long;
-			if (prevc == '#')
-				goto bad_stringer;
-			*buff++ = c;
-			if (c != '#')
-				continue;
-			while (isspace(*++s))
-				/* nothing */;
+	if (!accept('('))
+		return n;
+	++n;
+	if (accept(')'))
+		return n;
+
+	do {
+		if (n == NR_MACROARG) {
+			err = "too much parameters in macro";
+			goto popctx_and_error;
 		}
-		/* found an identifier, is it one of the macro arguments? */
-		for (p = s; isalnum(c = *p) || c == '_'; ++p)
-			/* nothing */;
-		ncopy = p - --s;
-		bp = args;
-		for (n = 0; n < nargs; ++n) {
-			if (strncmp(s, *bp++, ncopy))
-				continue;
-			sprintf(arroba, "@%02d@", n);
-			s = arroba, ncopy = 4;
-			break;
+		if (yytoken != IDEN) {
+			err = "macro arguments must be identifiers";
+			goto popctx_and_error;
 		}
-		if (n == nargs && prevc == '#')
-			goto bad_stringer;
-		if ((bufsiz -= ncopy) < 0)
-			goto too_long;
-		memcpy(buff, s, ncopy);
-		buff += ncopy, s = p;
-	}
-	if (bufsiz == 0)
-		goto too_long;
-	*buff = '\0';
-	return 1;
+		args[n++] = yylval.sym;
+		next();
+	} while (accept(','));
+	expect(')');
 
-bad_stringer:
-	printerr("'#' is not followed by a macro parameter");
-	return 0;
-too_long:
-	printerr("macro definition too long");
-	return 0;
+	return n;
+
+popctx_and_error:
+	popctx();
+	error(err);
 }
 
-static char *
-mkdefine(char *s)
+static void
+getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz)
 {
-	int nargs;
-	char *args[NR_MACROARG], buff[LINESIZ+1];
+	Symbol **argp;
+	char *err;
+	size_t len;
+	int prevc = 0, ispar;
 
-	if ((s = parseargs(s, args, &nargs)) == NULL)
-		return NULL;
-	sprintf(buff, "%02d#", nargs);
+	for (;;) {
+		ispar = 0;
+		if (yytoken == IDEN) {
+			for (argp = args; argp < &args[nargs]; ++argp) {
+				if (*argp == yylval.sym)
+					break;
+			}
+			if (argp != &args[nargs]) {
+				sprintf(yytext, "@%02d@", argp - args);
+				ispar = 1;
+			}
+		}
+		if (prevc == '#' && !ispar)
+			goto bad_stringer;
+		if (yytoken == EOFTOK)
+			break;
 
-	while (isspace(*s))
-		++s;
+		if ((len = strlen(yytext)) >= bufsiz) {
+			err = "too long macro";
+			goto popctx_and_error;
+		}
+		memcpy(bp, yytext, len);
+		bp += len;
+		bufsiz -= len;
+		if ((prevc = yytoken) != '#') {
+			bufsiz;
+			*bp++ = ' ';
+		}
+		next();
+	}
+	*bp = '\0';
+	return;
 
-	if (*s == '\0')
-		buff[0] = '\0';
-	else if (!copydefine(s, args, buff+3, LINESIZ-3, nargs))
-		return NULL;
-	return xstrdup(buff);
+bad_stringer:
+	err = "'#' is not followed by a macro parameter";
+popctx_and_error:
+	popctx();
+	error(err);
 }
 
 static void
-define(char *s)
+define(void)
 {
-	char *t;
-	Symbol *sym;
+	Symbol *sym,*args[NR_MACROARG];
+	char buff[LINESIZ+1];
+	int n;
 
 	if (cppoff)
 		return;
-	if (!iden(&s)) {
-		printerr("#define must have an identifier as parameter");
-		return;
-	}
-
-	for (t = s + strlen(s) + 1; isspace(*--t); *t = '\0')
-		/* nothing */;
-	if ((s = mkdefine(s)) == NULL)
-		return;
-
-	sym = lookup(NS_CPP);
+	if (yytoken != IDEN)
+		error("macro names must be identifiers");
+	sym = yylval.sym;
 	if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) {
 		warn("'%s' redefined", yytext);
 		free(sym->u.s);
+	} else if (sym->ns != NS_CPP) {
+		sym = lookup(NS_CPP);
 	}
 	sym->flags |= ISDEFINED;
-	sym->ns = NS_CPP;
-	sym->ctx = UCHAR_MAX;
-	sym->u.s = s;
+
+	pushctx();
+
+	next();
+	n = getpars(args);
+	sprintf(buff, "%02d#", n);
+	getdefs(args, n, buff+3, LINESIZ-3);
+	sym->u.s = xstrdup(buff);
+	fprintf(stderr, "Defining macro '%s'='%s'\n", sym->name, buff);
+
+	popctx();
 }
 
 static void
-include(char *s)
+include(void)
 {
-	char **bp, delim, c, *p, *file, path[FILENAME_MAX];
-	char *sysinclude[] = {
+	char **bp, *p, file[FILENAME_MAX], path[FILENAME_MAX];
+	static char *sysinclude[] = {
 		PREFIX"/include/",
 		PREFIX"/local/include/",
 		NULL
@@ -455,193 +348,199 @@
 
 	if (cppoff)
 		return;
-	if ((c = *s++) == '>')
-		delim = '>';
-	else if (c == '"')
-		delim = '"';
-	else
+	switch (*yytext) {
+	case '<':
+		if ((p = strchr(input->begin, '>')) == NULL)
+			goto bad_include;
+		*p = '\0';
+		if (p - input->begin >= FILENAME_MAX)
+			goto too_long;
+		strcpy(file, input->begin);
+		input->begin = input->p = p+1;
+		next();
+		break;
+	case '"':
+		if ((p = strchr(yytext + 1, '"')) == NULL)
+			goto bad_include;
+		*p = '\0';
+		if (p - yytext + 1 >= FILENAME_MAX)
+			goto too_long;
+		strcpy(file, yytext + 1);
+		next();
+		if (addinput(file))
+			return;
+		break;
+	default:
 		goto bad_include;
+	}
 
-	if (!string(&s, &file, delim))
-		goto bad_include;
-	if (delim == '"' && addinput(file, NULL, NULL))
-		return;
-
 	filelen = strlen(file);
 	for (bp = sysinclude; *bp; ++bp) {
 		dirlen = strlen(*bp);
-		if (dirlen + filelen > FILENAME_MAX)
+		if (dirlen + filelen > FILENAME_MAX-1)
 			continue;
 		memcpy(path, *bp, dirlen);
 		memcpy(path+dirlen, file, filelen);
-		if (addinput(path, NULL, NULL))
+		if (addinput(path))
 			break;
 	}
 	if (*bp)
-		printerr("included file '%s' not found", file);
-	cleanup(s);
+		error("included file '%s' not found", file);
+
 	return;
 
 bad_include:
-	printerr("#include expects \"FILENAME\" or <FILENAME>");
+	error("#include expects \"FILENAME\" or <FILENAME>");
+too_long:
+	error("#include FILENAME too long");
 }
 
 static void
-line(char *s)
+line(void)
 {
-	char *file;
+	char *file, *p;
+	Type *tp;
 	long n;
 
 	if (cppoff)
 		return;
-	if ((n = strtol(s, &s, 10)) <= 0 || n > USHRT_MAX) {
-		printerr("first parameter of #line is not a positive integer");
+	if ((n = strtol(input->p, &input->p, 10)) <= 0 || n > USHRT_MAX)
+		error("first parameter of #line is not a positive integer");
+
+	if (yytoken != CONSTANT || yylval.sym->type != inttype)
+		error("first parameter of #line is not a positive integer");
+
+	input->nline = yylval.sym->u.i;
+	next();
+	if (yytoken == EOFTOK)
 		return;
-	}
 
-	switch (*s) {
-	case ' ':
-	case '\t':
-		while (isspace(*s))
-			++s;
-		if (*s == '\0')
-			goto end_string;
-		if (*s++ != '"' && !string(&s, &file, '"'))
-			goto bad_file;
-		setfname(file);
-	case '\0':
-	end_string:
-		setfline(n-1);
-		break;;
-	default:
-	bad_file:
-		printerr("second parameter of #line is not a valid filename");
-		break;
-	}
-	cleanup(s);
+	tp = yylval.sym->type;
+	if (yytoken != CONSTANT || tp->op != ARY && tp->type != chartype)
+		error("second parameter of #line is not a valid filename");
+	free(input->fname);
+	input->fname = xstrdup(yylval.sym->u.s);
 }
 
 static void
-pragma(char *s)
+pragma(void)
 {
 	if (cppoff)
 		return;
+	/* TODO: discard input */
 }
 
 static void
-usererr(char *s)
+usererr(void)
 {
 	if (cppoff)
 		return;
-	printerr("#error %s", s);
-	exit(1);
+	printerr("#error %s", input->p);
+	/* TODO: discard input */
 }
 
 static void
-ifclause(char *s, int isdef)
+ifclause(int isdef)
 {
 	Symbol *sym;
-	unsigned n = cppctx++;
+	unsigned n;
 
-	if (cppctx == NR_COND-1) {
-		printerr("too much nesting levels of conditional inclusion");
-		return;
+	if (cppctx == NR_COND-1)
+		error("too much nesting levels of conditional inclusion");
+	n = cppctx++
+	if (yytoken != IDEN) {
+		error("no macro name given in #%s directive",
+		      (isdef) ? "ifdef" : "ifndef");
 	}
-	if (!iden(&s)) {
-		printerr("no macro name given in #%s directive",
-		         (isdef) ? "ifdef" : "ifndef");
-		return;
-	}
+
 	sym = lookup(NS_CPP);
+	next();
 	if (!(ifstatus[n] = (sym->flags & ISDEFINED) != 0 == isdef))
 		++cppoff;
-	cleanup(s);
 }
 
 static void
-ifdef(char *s)
+ifdef(void)
 {
-	ifclause(s, 1);
+	ifclause(1);
 }
 
 static void
-ifndef(char *s)
+ifndef(void)
 {
-	ifclause(s, 0);
+	ifclause(0);
 }
 
 static void
-endif(char *s)
+endif(void)
 {
-	if (cppctx == 0) {
-		printerr("#endif without #if");
-		return;
-	}
+	if (cppctx == 0)
+		error("#endif without #if");
+
 	if (!ifstatus[--cppctx])
 		--cppoff;
-	cleanup(s);
 }
 
 static void
-elseclause(char *s)
+elseclause(void)
 {
 	struct ifstatus *ip;
 
-	if (cppctx == 0) {
-		printerr("#else without #ifdef/ifndef");
-		return;
-	}
+	if (cppctx == 0)
+		error("#else without #ifdef/ifndef");
+
 	cppoff += (ifstatus[cppctx-1] ^= 1) ? -1 : 1;
-	cleanup(s);
 }
 
 static void
-undef(char *s)
+undef(void)
 {
 	Symbol *sym;
 
-	if (!iden(&s)) {
-		printerr("no macro name given in #undef directive");
+	if (cppoff)
 		return;
+	if (yytoken != IDEN) {
+		error("no macro name given in #undef directive");
+		return;
 	}
 	sym = lookup(NS_CPP);
 	sym->flags &= ~ISDEFINED;
-	cleanup(s);
 }
 
 bool
-cpp(char *s)
+cpp(void)
 {
 	static struct {
-		char *name;
-		void (*fun)(char *);
-	} *bp, cmds[] =  {
-		"define", define,
-		"include", include,
-		"ifdef", ifdef,
-		"ifndef", ifndef,
-		"endif", endif,
-		"else", elseclause,
-		"undef", undef,
-		"line", line,
-		"pragma", pragma,
-		"error", usererr,
-		NULL, NULL
+		uint8_t tok;
+		void (*fun)(void);
+	} *bp, clauses [] = {
+		{DEFINE, define},
+		{INCLUDE, include},
+		{LINE, line},
+		{IFDEF, ifdef},
+		{IFNDEF, ifndef},
+		{ELSE, elseclause},
+		{UNDEF, undef},
+		{PRAGMA, pragma},
+		{ERROR, usererr},
+		{0, NULL}
 	};
 
-	if (*s++ != '#')
+	if (*input->p != '#')
 		return 0;
-	while (isspace(*s))
-		++s;
-	if (!iden(&s))
-		goto incorrect;
-	for (bp = cmds; bp->name; ++bp) {
-		if (strcmp(bp->name, yytext))
-			continue;
-		(*bp->fun)(s);
-		return 1;
-	}
-incorrect:
-	printerr("invalid preprocessor directive #%s", yytext);
+	++input->p;
+	lexmode = CPPMODE;
+	setnamespace(NS_CPPCLAUSES);
+	next();
+	for (bp = clauses; bp->tok && bp->tok != yytoken; ++bp)
+		/* nothing */;
+	if (!bp->tok)
+		error("incorrect preprocessor directive");
+	next();
+	(*bp->fun)();
+
+	if (yytoken != EOFTOK && !cppoff)
+		error("trailing characters after preprocessor directive");
+	lexmode = CCMODE;
 	return 1;
 }
--- a/cc1/error.c
+++ b/cc1/error.c
@@ -19,8 +19,9 @@
 		return;
 	if (flag < 0)
 		failure = 1;
-	fprintf(stderr, "%s:%s:%u: ",
-		(flag < 0) ? "error" : "warning", getfname(), getfline());
+	fprintf(stderr, "%s:%u: %s: ",
+	       input->fname, input->nline,
+	       (flag < 0) ? "error" : "warning");
 	vfprintf(stderr, fmt, va);
 	putc('\n', stderr);
 	if (flag < 0 && nerrors++ == MAXERRNUM) {
--- a/cc1/lex.c
+++ b/cc1/lex.c
@@ -11,38 +11,27 @@
 #include "../inc/cc.h"
 #include "cc1.h"
 
-typedef struct input Input;
-
-struct input {
-	char *fname;
-	unsigned short nline;
-	FILE *fp;
-	char *line, *begin, *p;
-	Symbol *macro;
-	struct input *next;
-};
-
 unsigned yytoken;
 struct yystype yylval;
 char yytext[STRINGSIZ+3];
 unsigned short yylen;
 int cppoff;
+int lexmode = CCMODE;
 
 static unsigned lex_ns = NS_IDEN;
 static int safe, eof;
-static Input *input;
+Input *input;
 
 static void
-allocinput(char *fname, FILE *fp, char *buff)
+allocinput(char *fname, FILE *fp)
 {
 	Input *ip;
 
 	ip = xmalloc(sizeof(Input));
-	ip->fname = fname;
+	ip->fname = xstrdup(fname);
+	ip->p = ip->begin = ip->line = xmalloc(INPUTSIZ);
+	ip->nline = 0;
 	ip->next = input;
-	ip->macro = NULL;
-	ip->begin = ip->line = buff;
-	ip->nline = (fp) ? 0 : input->nline;
 	ip->fp = fp;
 	input = ip;
 }
@@ -52,10 +41,6 @@
 {
 	FILE *fp;
 
-	/*
-	 * we can use static file names because this Input is not going
-	 * to be freed ever
-	 */
 	if (!fname) {
 		fp = stdin;
 		fname = "<stdin>";
@@ -64,38 +49,18 @@
 			die("error opening output:%s", strerror(errno));
 		fname = fname;
 	}
-	allocinput(fname, fp, xmalloc(INPUTSIZ));
+	allocinput(fname, fp);
 	*input->begin = '\0';
 }
 
 bool
-addinput(char *fname, Symbol *sym, char *str)
+addinput(char *fname)
 {
 	FILE *fp;
-	char flags = 0;
 
-	if (fname) {
-		/*
-		 * this call comes from an include clause, so we reuse
-		 * the buffer from the calling Input
-		 */
-		if ((fp = fopen(fname, "r")) == NULL)
-			return 0;
-		fname = xstrdup(fname);
-		str = input->line;
-		*str = '\0';
-	} else {
-		/*
-		 * This call comes from a macro expansion, so we have
-		 * to duplicate the input string because it is the
-		 * expansion of the macro in a temporal buffer
-		 */
-		fname = input->fname;
-		fp = NULL;
-		str = xstrdup(str);
-	}
-	allocinput(fname, fp, str);
-	input->macro = sym;
+	if ((fp = fopen(fname, "r")) == NULL)
+		return 0;
+	allocinput(fname, fp);
 	return 1;
 }
 
@@ -102,57 +67,26 @@
 static void
 delinput(void)
 {
-	Input *ip;
+	Input *ip = input;
 
-repeat:
-	if (input->fp) {
-		/* include input */
-		if (fclose(input->fp))
-			die("error reading from input file '%s'", input->fname);
-		if (!input->next) {
-			eof = 1;
-			return;
-		}
-		free(input->fname);
-	} else {
-		/* macro input */
-		free(input->line);
-	}
-	ip = input;
-	input = input->next;
-	free(ip);
-
-	if (*input->begin != '\0')
+	if (!ip->next)
+		eof = 1;
+	if (fclose(ip->fp))
+		die("error reading from input file '%s'", ip->fname);
+	if (eof)
 		return;
-	if (!input->fp)
-		goto repeat;
+	input = ip->next;
+	free(ip->fname);
+	free(ip->line);
 }
 
-void
-setfname(char *name)
+static void
+newline(void)
 {
-	free(input->fname);
-	input->fname = xstrdup(name);
+	if (++input->nline == 0)
+		die("error:input file '%s' too long", input->fname);
 }
 
-char *
-getfname(void)
-{
-	return input->fname;
-}
-
-void
-setfline(unsigned short line)
-{
-	input->nline = line;
-}
-
-unsigned short
-getfline(void)
-{
-	return input->nline;
-}
-
 static char
 readchar(void)
 {
@@ -160,22 +94,25 @@
 	FILE *fp;
 
 repeat:
-	if (feof(input->fp))
-		delinput();
-	if (eof)
-		return '\0';
 	fp = input->fp;
 
-	if ((c = getc(fp)) == '\\') {
-		if ((c = getc(fp)) == '\n')
+	switch (c = getc(fp)) {
+	case EOF:
+		c = '\0';
+		break;
+	case '\\':
+		if ((c = getc(fp)) == '\n') {
+			newline();
 			goto repeat;
+		}
 		ungetc(c, fp);
 		c = '\\';
-	} else if (c == EOF) {
-		c = '\n';
-	} else if (c == '\n' && ++input->nline == 0) {
-		die("error:input file '%s' too long", getfname());
+		break;
+	case '\n':
+		newline();
+		break;
 	}
+
 	return c;
 }
 
@@ -203,10 +140,17 @@
 	char *bp, *lim;
 	char c, peekc = 0;
 
+repeat:
+	input->begin = input->p = input->line;
+	*input->line = '\0';
 	if (eof)
 		return 0;
+	if (feof(input->fp)) {
+		delinput();
+		goto repeat;
+	}
 	lim = &input->line[INPUTSIZ-1];
-	for (bp = input->line; bp != lim; *bp++ = c) {
+	for (bp = input->line; bp < lim; *bp++ = c) {
 		c = (peekc) ? peekc : readchar();
 		peekc = 0;
 		if (c == '\n' || c == '\0')
@@ -222,8 +166,10 @@
 		}
 	}
 
-	if (bp == lim)
-		error("line %u too big in file '%s'", getfline(), getfname());
+	if (bp == lim) {
+		error("line %u too big in file '%s'",
+		      input->line, input->fname);
+	}
 	*bp = '\0';
 	return 1;
 }
@@ -231,24 +177,18 @@
 bool
 moreinput(void)
 {
-	char *p;
-
 repeat:
-	if (!input->fp)
-		delinput();
-	if (*input->begin)
-		return 1;
 	if (!readline())
 		return 0;
-	p = input->line;
-	while (isspace(*p))
-		++p;
-	if (*p == '\0' || cpp(p) || cppoff) {
+	while (isspace(*input->p))
+		++input->p;
+	input->begin = input->p;
+	if (*input->p == '\0' || cpp() || cppoff) {
 		*input->begin = '\0';
 		goto repeat;
 	}
 
-	input->p = input->begin = p;
+	input->begin = input->p;
 	return 1;
 }
 
@@ -403,7 +343,9 @@
 
 	*bp++ = '"';
 repeat:
-	for (++input->p; (c = *input->p) != '\0' && c != '"'; ++input->p) {
+	for (++input->p; (c = *input->p) != '"'; ++input->p) {
+		if (c == '\0')
+			error("missing terminating '\"' character");
 		if (c == '\\')
 			c = escape();
 		if (bp == &yytext[STRINGSIZ+1])
@@ -410,9 +352,8 @@
 			error("string too long");
 		*bp++ = c;
 	}
-	if (c == '\0')
-		error("missing terminating '\"' character");
-	input->begin = input->p + 1;
+
+	input->begin = ++input->p;
 	if (ahead() == '"')
 		goto repeat;
 	*bp = '\0';
@@ -430,15 +371,16 @@
 iden(void)
 {
 	Symbol *sym;
-	char *p, *t, c;
+	char *p, *begin;
 
-	for (p = input->p; isalnum(*p) || *p == '_'; ++p)
+	begin = input->p;
+	for (p = begin; isalnum(*p) || *p == '_'; ++p)
 		/* nothing */;
 	input->p = p;
 	tok2str();
 	yylval.sym = sym = lookup(lex_ns);
 	if (sym->ns == NS_CPP) {
-		if (!disexpand && sym != input->macro && expand(sym))
+		if (!disexpand && expand(begin, sym))
 			return next();
 		/*
 		 * it is not a correct macro call, so try to find
@@ -554,17 +496,15 @@
 static void
 skipspaces(void)
 {
-	char *p;
-
 repeat:
-	for (p = input->begin; isspace(*p); ++p)
-		/* nothing */;
-	if (*p == '\0') {
+	while (isspace(*input->p))
+		++input->p;
+	if (*input->p == '\0' && lexmode != CPPMODE) {
 		if (!moreinput())
 			return;
 		goto repeat;
 	}
-	input->begin = input->p = p;
+	input->begin = input->p;
 }
 
 unsigned
@@ -573,14 +513,15 @@
 	char c;
 
 	skipspaces();
-	if (eof) {
-		if (cppctx)
-			error("#endif expected");
+	c = *input->begin;
+	if ((eof || lexmode == CPPMODE) && c == '\0') {
 		strcpy(yytext, "<EOF>");
-		return yytoken = EOFTOK;
+		if (cppctx && eof)
+			error("#endif expected");
+		yytoken = EOFTOK;
+		goto exit;
 	}
 
-	c = *input->begin;
 	if (isalpha(c) || c == '_')
 		yytoken = iden();
 	else if (isdigit(c))
@@ -592,8 +533,8 @@
 	else
 		yytoken = operator();
 
-	fputs(yytext, stderr);
-	putc('\n', stderr);
+exit:
+	fprintf(stderr, "%s\n", yytext);
 	lex_ns = NS_IDEN;
 	return yytoken;
 }
--- a/cc1/symbol.c
+++ b/cc1/symbol.c
@@ -191,7 +191,7 @@
 	static struct {
 		char *str;
 		unsigned char token, value;
-	} *bp, buff[] = {
+	} *bp, keywords[] = {
 		{"auto", SCLASS, AUTO},
 		{"break", BREAK, BREAK},
 		{"_Bool", TYPE, BOOL},
@@ -227,14 +227,32 @@
 		{"volatile", TQUALIFIER, VOLATILE},
 		{"while", WHILE, WHILE},
 		{NULL, 0, 0},
-	};
+	}, cppclauses[] = {
+		{"define", DEFINE, DEFINE},
+		{"include", INCLUDE, INCLUDE},
+		{"line", LINE, LINE},
+		{"ifdef", IFDEF, IFDEF},
+		{"else", ELSE, ELSE},
+		{"ifndef", IFNDEF, IFNDEF},
+		{"undef", UNDEF, UNDEF},
+		{"pragma", PRAGMA, PRAGMA},
+		{"error", ERROR, ERROR}
+	}, *list[] = {
+		keywords,
+		cppclauses,
+		NULL
+	}, **lp;
 	Symbol *sym;
+	int ns = NS_KEYWORD;
 
-	for (bp = buff; bp->str; ++bp) {
-		strcpy(yytext, bp->str);
-		sym = lookup(NS_KEYWORD);
-		sym->token = bp->token;
-		sym->u.token = bp->value;
+	for (lp = list; *lp; ++lp) {
+		for (bp = *lp; bp->str; ++bp) {
+			strcpy(yytext, bp->str);
+			sym = lookup(ns);
+			sym->token = bp->token;
+			sym->u.token = bp->value;
+		}
+		ns = NS_CPPCLAUSES;
 	}
 	globalcnt = 0;
 }