shithub: scc

Download patch

ref: 4e05c8d89c382a02992de4fad24c034d333bec2c
parent: d0b5bbca0d530653369c8cc94214fe44cc2bfad1
parent: e1b218ec5e6cc2745a43c5244bf6c2c0481b3e07
author: Roberto E. Vargas Caballero <[email protected]>
date: Fri Jul 17 15:36:19 EDT 2015

Merge remote-tracking branch 'kcc/master'

--- a/README
+++ b/README
@@ -7,12 +7,12 @@
 
 After a lot of years seeing compilers for z80 I notice that it is very
 hard for a C compiler generates a good code for a 8 bit processor like
-z80, with a expensive indirect addressing mode, so the best solution
+z80, with an expensive indirect addressing mode, so the best solution
 is to not try that compiler optimize the code and use C as a macro
 macro assembler.
 
-In order to get this last, we need a compiler that does exactly the
-things we want to do. For example is a variable is register it MUST be
+In order to get this target, we need a compiler that does exactly the
+things programmer want to do. For example is a variable is register it MUST be
 register and fails in other case. If a variable is automatic try to
 realize operations directly with the stack (for example use ADD
 A,(IX+4), instead of allocate the variable into register add and store
@@ -20,7 +20,7 @@
 big bullshit or you need it for recursion (static variables are your
 friends). 
 
-This is the reason why I begin to develop this compiler, and I hope
+This is the reason why I began to develop this compiler, and I hope
 it will be useful for you.
 
 Changes from standard C
@@ -37,7 +37,7 @@
 	- const: The definition of const is not clear in the standard.
 	  If a const value is modified then the behaviour is implementation
 	  defined. It seems that it was defined more in order to can
-	  allocate variables in ROM that for the error detection. This
+	  allocate variables in ROM than for the error detection. This
 	  implememtation will not warn about these modifications and
 	  the code will use them as normal variables (standard specifies
 	  that a diagnosis message must be printed).
@@ -47,17 +47,18 @@
 	  variable', which of course depend of the king of optimizations
 	  applied to the variable. This qualifier was added to the standard
 	  to can deal with longjmp (local variables that are not volatile
-	  have undefined state), but this can achieved with special pragma
-	  values, and for memory mapped registers or variables whose
-	  value is modified asynchronous. In the first case, this is a
-	  a non portable code by definition (depend of the register mapped),
-	  so it is better to deal with it using another solution (compiler
-	  extensions or directly assembler), and in the second case it
-	  generated a lot of problems with moderm processors out of order
-	  and multiprocesor, where not hold the value in a register is
-	  good enough (it is needed a explicit memory barrier).
+	  have undefined state), and for memory mapped registers or variables
+	  whose value is modified asynchronous but this can achieved with
+	  special pragma values.
+	  In the first case, this is a a non portable code by definition
+	  (depend of the register mapped), so it is better to deal with
+	  it using another solution (compiler extensions or directly
+	  assembler), and in the second case it generated a lot of
+	  problems with moderm processors out of order and multiprocesor,
+	  where not hold the value in a register is good enough (it is
+	  needed a explicit memory barrier).
 
-	- restricted: This qualifer can be only applied to pointers, to
+	- restrict: This qualifer can be only applied to pointers, to
 	  mark that the pointed object has no other alias. This qualifer
 	  was introduced to can fix some performance problems in numerical
 	  algorithm, where FORTRAN can achieve a better performance (and
--- a/cc1/cc1.h
+++ b/cc1/cc1.h
@@ -11,7 +11,9 @@
 typedef struct symbol Symbol;
 typedef struct caselist Caselist;
 typedef struct node Node;
+typedef struct input Input;
 
+
 struct type {
 	unsigned char op;           /* type builder operator */
 	unsigned char ns;
@@ -70,6 +72,14 @@
 	unsigned char token;
 };
 
+struct input {
+	char *fname;
+	void *fp;
+	char *line, *begin, *p;
+	struct input *next;
+	unsigned short nline;
+};
+
 /*
  * Definition of enumerations
  */
@@ -96,6 +106,7 @@
 	NS_LABEL,
 	NS_CPP,
 	NS_KEYWORD,
+	NS_CPPCLAUSES,
 	NS_STRUCTS
 };
 
@@ -111,6 +122,13 @@
 	ISEXTERN   =128
 };
 
+
+/* lexer mode, compiler or preprocessor directive */
+enum {
+	CCMODE,
+	CPPMODE
+};
+
 /* input tokens */
 enum tokens {
 	TQUALIFIER = 128,
@@ -177,6 +195,14 @@
 	CONTINUE,
 	BREAK,
 	RETURN,
+	DEFINE,
+	INCLUDE,
+	LINE,
+	PRAGMA,
+	ERROR,
+	IFDEF,
+	IFNDEF,
+	UNDEF,
 	EOFTOK
 };
 
@@ -275,11 +301,7 @@
 extern bool moreinput(void);
 extern void expect(unsigned tok);
 extern void discard(void);
-extern char *getfname(void);
-extern unsigned short getfline(void);
-extern void setfname(char *name);
-extern void setfline(unsigned short line);
-extern bool addinput(char *fname, Symbol *sym, char *str);
+extern bool addinput(char *fname);
 extern void setnamespace(int ns);
 extern void setsafe(int type);
 extern void ilex(char *fname);
@@ -298,8 +320,8 @@
 
 /* cpp.c */
 extern void icpp(void);
-extern bool cpp(char *s);
-extern int expand(Symbol *sym);
+extern bool cpp(void);
+extern bool expand(char *begin, Symbol *sym);
 
 /*
  * Definition of global variables
@@ -310,6 +332,8 @@
 extern unsigned short yylen;
 extern int cppoff, disexpand;
 extern unsigned cppctx;
+extern Input *input;
+extern int lexmode;
 
 extern Type *voidtype, *pvoidtype, *booltype,	
             *uchartype,   *chartype,
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
@@ -19,7 +19,7 @@
 static unsigned arglen;
 static Symbol *symline, *symfile;
 static unsigned char ifstatus[NR_COND];
-static int paramerr;
+static Type *charptype;
 
 unsigned cppctx;
 int disexpand;
@@ -56,87 +56,26 @@
 	symfile = defmacro("__FILE__");
 }
 
-static bool
-iden(char **str)
-{
-	char c, *bp, *s = *str;
-
-	if (!isalpha(c = *s) && c != '_')
-		return 0;
-	for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = c) {
-		if ((c = *s) == '\0' || !isalnum(c) && c != '_')
-			break;
-		++s;
-	}
-	if (bp == &yytext[IDENTSIZ]) {
-		printerr("identifier too long in preprocessor");
-		return 0;
-	}
-	*bp = '\0';
-
-	while (isspace(*s))
-		++s;
-
-	*str = s;
-	return 1;
-}
-
-static bool
-string(char **input, char **str, char delim)
-{
-	char c, *s = *input;
-
-	if (str)
-		*str = s;
-
-	while ((c = *s) && c != delim)
-		++s;
-	if (c == '\0')
-		return 0;
-	*s++ = '\0';
-	*input = s;
-
-	return 1;
-}
-
 static void
-cleanup(char *s)
-{
-	while (isspace(*s))
-		++s;
-	if (*s != '\0')
-		printerr("trailing characters after preprocessor directive");
-}
-
-static void
 nextcpp(void)
 {
-	next();
-	if (yytoken == EOFTOK) {
-		printerr("unterminated argument list invoking macro \"%s\"",
-		      macroname);
-		goto mark_error;
-	}
-	if (yylen + 1 > arglen) {
-		printerr("argument overflow invoking macro \"%s\"",
-		      macroname);
-		goto mark_error;
-	}
-	memcpy(argp, yytext, yylen);
-	argp += yylen;
-	*argp++ = ' ';
-	arglen -= yylen + 1;
-	return;
-
-mark_error:
-	paramerr = 1;
-	yytoken = 0;
+        next();
+        if (yytoken == EOFTOK)
+		error("unterminated argument list invoking macro \"%s\"",
+                      macroname);
+        if (yylen + 1 > arglen)
+                error("argument overflow invoking macro \"%s\"",
+                      macroname);
+        memcpy(argp, yytext, yylen);
+        argp += yylen;
+        *argp++ = ' ';
+        arglen -= yylen + 1;
 }
 
 static void
 paren(void)
 {
-	while (!paramerr) {
+	for (;;) {
 		nextcpp();
 		switch (yytoken) {
 		case ')':
@@ -151,7 +90,7 @@
 static void
 parameter(void)
 {
-	while (!paramerr) {
+	for (;;) {
 		nextcpp();
 		switch (yytoken) {
 		case ')':
@@ -172,90 +111,55 @@
 	int n;
 
 	if (nargs == -1)
-		return 1;
-
-	if (ahead() != '(')
+		return -1;
+	if (ahead() != '(' && nargs > 0)
 		return 0;
 
 	disexpand = 1;
 	next();
-	paramerr = n = 0;
+	n = 0;
 	argp = buffer;
 	arglen = INPUTSIZ;
-	if (ahead() != ')') {
+	if (yytoken != ')') {
 		do {
 			*listp++ = argp;
 			parameter();
-		} while (!paramerr && ++n < NR_MACROARG && yytoken == ',');
+		} while (++n < NR_MACROARG && yytoken == ',');
 	}
+	if (yytoken != ')')
+		error("incorrect macro function alike invocation");
 	disexpand = 0;
 
-	if (paramerr)
-		return -1;
-	if (n == NR_MACROARG) {
-		printerr("too much parameters in macro \"%s\"", macroname);
-		return -1;
-	}
+	if (n == NR_MACROARG)
+		error("too much parameters in macro \"%s\"", macroname);
 	if (n != nargs) {
-		printerr("macro \"%s\" passed %d arguments, but it takes %d",
+		error("macro \"%s\" passed %d arguments, but it takes %d",
 		      macroname, n, nargs);
-		return -1;
 	}
 
 	return 1;
 }
 
-/*
- * sym->u.s is a string with the following format:
- * 	dd#string
- * where dd is the number of arguments of the macro
- * (-1 if it is a macro without arguments), and string
- * is the macro definition, where @dd@ indicates the
- * parameter number dd
- */
-#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2)
-int
-expand(Symbol *sym)
+static void
+copymacro(char *bp, char *s, size_t bufsiz, char *arglist[])
 {
-	unsigned len;
-	int r, n;
-	char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE];
-	char prevc, c, *bp, *lim, *arg, *s = sym->u.s;
+	char prevc, c, *arg;
 
-	fprintf(stderr, "macro %s:%s\n", sym->name, sym->u.s);
-	if (sym == symfile) {
-		sprintf(buffer, "\"%s\"", getfname());
-		goto add_macro;
-	}
-	if (sym == symline) {
-		sprintf(buffer, "%d", getfline());
-		goto add_macro;
-	}
-
-	macroname = sym->name;
-	if ((r = parsepars(arguments, arglist, atoi(s))) < 1)
-		return r;
-
-	for (n = 0; n < atoi(s); ++n)
-		fprintf(stderr, "PAR%d:%s\n", n, arglist[n]);
-
-	len = INPUTSIZ-1;
-	bp = buffer;
-	for (prevc = '\0', s += 3; c = *s; prevc = c, ++s) {
+	for (prevc = '\0'; c = *s; prevc = c, ++s) {
 		if (c != '@') {
 			if (c == '#')
 				continue;
-			if (len-- == 0)
+			if (bufsiz-- == 0)
 				goto expansion_too_long;
 			*bp++ = c;
 		} else {
-			unsigned size;
+			size_t size;
 
 			if (prevc == '#')
-				len -= 2;
+				bufsiz -= 2;
 			arg = arglist[atoi(++s)];
 			size = strlen(arg);
-			if (size > len)
+			if (size > bufsiz)
 				goto expansion_too_long;
 			if (prevc == '#')
 				*bp++ = '"';
@@ -263,190 +167,179 @@
 			bp += size;
 			if (prevc == '#')
 				*bp++ = '"';
-			len -= size;
+			bufsiz -= size;
 			s += 2;
 		}
 	}
-	*bp = '\0';
-	fprintf(stderr, "macro expanded:%s\n", buffer);
-add_macro:
-	addinput(NULL, sym, buffer);
-	return 1;
+        *bp = '\0';
 
+	return;
+
 expansion_too_long:
-	printerr("expansion of macro \"%s\" is too long", macroname);
-	return -1;
+	error("expansion of macro \"%s\" is too long", macroname);
 }
-#undef BUFSIZE
 
-/*
- * Parse an argument list (par0, par1, ...) and creates
- * an array with pointers to all the arguments in the
- * list
- */
-static char *
-parseargs(char *s, char *args[NR_MACROARG], int *nargs)
+#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2)
+bool
+expand(char *begin, Symbol *sym)
 {
-	int n;
 	size_t len;
-	char *endp, c;
+	int n;
+	char *s = sym->u.s;
+	char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE];
 
-	n = -1;
-	if (*s != '(')
-		goto set_nargs;
-	n = 0;
-	while (isspace(*s++))
-		/* nothing */;
-	if (*s == ')')
-		goto set_nargs;
-
-	for (n = 1; n <= NR_MACROARG; ++n) {
-		while (isspace(*s))
-			++s;
-		if (!isalpha(*s) && *s != '_') {
-			printerr("macro arguments must be identifiers");
-			return NULL;
-		}
-		for (endp = s+1; isalnum(*endp) || *endp == '_'; ++endp)
-			/* nothing */;
-		if ((len = endp - s) > IDENTSIZ) {
-			printerr("macro argument too long");
-			return NULL;
-		}
-		*args++ = s;
-		for (s = endp; isspace(*s); ++s)
-			*s = '\0';
-		c = *s;
-		*s++ = '\0';
-		if (c == ')')
-			break;
-		if (c == ',') {
-			continue;
-		} else {
-			printerr("macro parameters must be comma-separated");
-			return NULL;
-		}
+	fprintf(stderr, "macro '%s':%s\n", sym->name, sym->u.s);
+	if (sym == symfile) {
+		sprintf(buffer, "\"%s\"", input->fname);
+		goto print_subs;
 	}
-	if (n > NR_MACROARG) {
-		printerr("too much parameters in macro");
-		return NULL;
+	if (sym == symline) {
+		sprintf(buffer, "%d", input->line);
+		goto print_subs;
 	}
 
-set_nargs:
-	*nargs = n;
-	return s;
+	macroname = sym->name;
+	if (!parsepars(arguments, arglist, atoi(s)))
+		return 0;
+        for (n = 0; n < atoi(s); ++n)
+                fprintf(stderr, "PAR%d:%s\n", n, arglist[n]);
+
+	copymacro(buffer, s+3, INPUTSIZ-1, arglist);
+
+print_subs:
+	fprintf(stderr, "macro '%s' expanded to :'%s'\n", macroname, buffer);
+	len = strlen(buffer);
+
+	/* cut macro invocation */
+	memmove(begin, input->p, input->p - begin);
+	memmove(begin + len, begin, len);
+
+	/* paste macro expansion */
+	memcpy(begin, buffer, len);
+	input->p = input->begin = begin;
+
+	return 1;
 }
+#undef BUFSIZE
 
-/*
- * Copy a string define, and substitute formal arguments of the
- * macro into strings in the form @XX@, where XX is the position
- * of the argument in the argument list.
- */
-static bool
-copydefine(char *s, char *args[], char *buff, int bufsiz, int nargs)
+static int
+getpars(Symbol *args[NR_MACROARG])
 {
-	int n;
-	size_t ncopy;
-	char arroba[6], *p, **bp, c, prevc;
+	int n = -1;
+	char *err;
 
-	for (prevc = '\0'; c = *s++; prevc = c) {
-		if (!isalpha(c) && c != '_' || nargs < 1) {
-			if (bufsiz-- == 0)
-				goto too_long;
-			if (prevc == '#')
-				goto bad_stringer;
-			*buff++ = c;
-			if (c != '#')
-				continue;
-			while (isspace(*++s))
-				/* nothing */;
+	if (!accept('('))
+		return n;
+	++n;
+	if (accept(')'))
+		return n;
+
+	do {
+		if (n == NR_MACROARG) {
+			err = "too much parameters in macro";
+			goto popctx_and_error;
 		}
-		/* found an identifier, is it one of the macro arguments? */
-		for (p = s; isalnum(c = *p) || c == '_'; ++p)
-			/* nothing */;
-		ncopy = p - --s;
-		bp = args;
-		for (n = 0; n < nargs; ++n) {
-			if (strncmp(s, *bp++, ncopy))
-				continue;
-			sprintf(arroba, "@%02d@", n);
-			s = arroba, ncopy = 4;
-			break;
+		if (yytoken != IDEN) {
+			err = "macro arguments must be identifiers";
+			goto popctx_and_error;
 		}
-		if (n == nargs && prevc == '#')
-			goto bad_stringer;
-		if ((bufsiz -= ncopy) < 0)
-			goto too_long;
-		memcpy(buff, s, ncopy);
-		buff += ncopy, s = p;
-	}
-	if (bufsiz == 0)
-		goto too_long;
-	*buff = '\0';
-	return 1;
+		args[n++] = yylval.sym;
+		next();
+	} while (accept(','));
+	expect(')');
 
-bad_stringer:
-	printerr("'#' is not followed by a macro parameter");
-	return 0;
-too_long:
-	printerr("macro definition too long");
-	return 0;
+	return n;
+
+popctx_and_error:
+	popctx();
+	error(err);
 }
 
-static char *
-mkdefine(char *s)
+static void
+getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz)
 {
-	int nargs;
-	char *args[NR_MACROARG], buff[LINESIZ+1];
+	Symbol **argp;
+	char *err;
+	size_t len;
+	int prevc = 0, ispar;
 
-	if ((s = parseargs(s, args, &nargs)) == NULL)
-		return NULL;
-	sprintf(buff, "%02d#", nargs);
+	for (;;) {
+		ispar = 0;
+		if (yytoken == IDEN) {
+			for (argp = args; argp < &args[nargs]; ++argp) {
+				if (*argp == yylval.sym)
+					break;
+			}
+			if (argp != &args[nargs]) {
+				sprintf(yytext, "@%02d@", argp - args);
+				ispar = 1;
+			}
+		}
+		if (prevc == '#' && !ispar)
+			goto bad_stringer;
+		if (yytoken == EOFTOK)
+			break;
 
-	while (isspace(*s))
-		++s;
+		if ((len = strlen(yytext)) >= bufsiz) {
+			err = "too long macro";
+			goto popctx_and_error;
+		}
+		memcpy(bp, yytext, len);
+		bp += len;
+		bufsiz -= len;
+		if ((prevc = yytoken) != '#') {
+			bufsiz;
+			*bp++ = ' ';
+		}
+		next();
+	}
+	*bp = '\0';
+	return;
 
-	if (*s == '\0')
-		buff[0] = '\0';
-	else if (!copydefine(s, args, buff+3, LINESIZ-3, nargs))
-		return NULL;
-	return xstrdup(buff);
+bad_stringer:
+	err = "'#' is not followed by a macro parameter";
+popctx_and_error:
+	popctx();
+	error(err);
 }
 
 static void
-define(char *s)
+define(void)
 {
-	char *t;
-	Symbol *sym;
+	Symbol *sym,*args[NR_MACROARG];
+	char buff[LINESIZ+1];
+	int n;
 
 	if (cppoff)
 		return;
-	if (!iden(&s)) {
-		printerr("#define must have an identifier as parameter");
-		return;
-	}
-
-	for (t = s + strlen(s) + 1; isspace(*--t); *t = '\0')
-		/* nothing */;
-	if ((s = mkdefine(s)) == NULL)
-		return;
-
-	sym = lookup(NS_CPP);
+	if (yytoken != IDEN)
+		error("macro names must be identifiers");
+	sym = yylval.sym;
 	if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) {
 		warn("'%s' redefined", yytext);
 		free(sym->u.s);
+	} else if (sym->ns != NS_CPP) {
+		sym = lookup(NS_CPP);
 	}
 	sym->flags |= ISDEFINED;
-	sym->ns = NS_CPP;
-	sym->ctx = UCHAR_MAX;
-	sym->u.s = s;
+
+	pushctx();
+
+	next();
+	n = getpars(args);
+	sprintf(buff, "%02d#", n);
+	getdefs(args, n, buff+3, LINESIZ-3);
+	sym->u.s = xstrdup(buff);
+	fprintf(stderr, "Defining macro '%s'='%s'\n", sym->name, buff);
+
+	popctx();
 }
 
 static void
-include(char *s)
+include(void)
 {
-	char **bp, delim, c, *p, *file, path[FILENAME_MAX];
-	char *sysinclude[] = {
+	char **bp, *p, file[FILENAME_MAX], path[FILENAME_MAX];
+	static char *sysinclude[] = {
 		PREFIX"/include/",
 		PREFIX"/local/include/",
 		NULL
@@ -455,193 +348,199 @@
 
 	if (cppoff)
 		return;
-	if ((c = *s++) == '>')
-		delim = '>';
-	else if (c == '"')
-		delim = '"';
-	else
+	switch (*yytext) {
+	case '<':
+		if ((p = strchr(input->begin, '>')) == NULL)
+			goto bad_include;
+		*p = '\0';
+		if (p - input->begin >= FILENAME_MAX)
+			goto too_long;
+		strcpy(file, input->begin);
+		input->begin = input->p = p+1;
+		next();
+		break;
+	case '"':
+		if ((p = strchr(yytext + 1, '"')) == NULL)
+			goto bad_include;
+		*p = '\0';
+		if (p - yytext + 1 >= FILENAME_MAX)
+			goto too_long;
+		strcpy(file, yytext + 1);
+		next();
+		if (addinput(file))
+			return;
+		break;
+	default:
 		goto bad_include;
+	}
 
-	if (!string(&s, &file, delim))
-		goto bad_include;
-	if (delim == '"' && addinput(file, NULL, NULL))
-		return;
-
 	filelen = strlen(file);
 	for (bp = sysinclude; *bp; ++bp) {
 		dirlen = strlen(*bp);
-		if (dirlen + filelen > FILENAME_MAX)
+		if (dirlen + filelen > FILENAME_MAX-1)
 			continue;
 		memcpy(path, *bp, dirlen);
 		memcpy(path+dirlen, file, filelen);
-		if (addinput(path, NULL, NULL))
+		if (addinput(path))
 			break;
 	}
 	if (*bp)
-		printerr("included file '%s' not found", file);
-	cleanup(s);
+		error("included file '%s' not found", file);
+
 	return;
 
 bad_include:
-	printerr("#include expects \"FILENAME\" or <FILENAME>");
+	error("#include expects \"FILENAME\" or <FILENAME>");
+too_long:
+	error("#include FILENAME too long");
 }
 
 static void
-line(char *s)
+line(void)
 {
-	char *file;
+	char *file, *p;
+	Type *tp;
 	long n;
 
 	if (cppoff)
 		return;
-	if ((n = strtol(s, &s, 10)) <= 0 || n > USHRT_MAX) {
-		printerr("first parameter of #line is not a positive integer");
+	if ((n = strtol(input->p, &input->p, 10)) <= 0 || n > USHRT_MAX)
+		error("first parameter of #line is not a positive integer");
+
+	if (yytoken != CONSTANT || yylval.sym->type != inttype)
+		error("first parameter of #line is not a positive integer");
+
+	input->nline = yylval.sym->u.i;
+	next();
+	if (yytoken == EOFTOK)
 		return;
-	}
 
-	switch (*s) {
-	case ' ':
-	case '\t':
-		while (isspace(*s))
-			++s;
-		if (*s == '\0')
-			goto end_string;
-		if (*s++ != '"' && !string(&s, &file, '"'))
-			goto bad_file;
-		setfname(file);
-	case '\0':
-	end_string:
-		setfline(n-1);
-		break;;
-	default:
-	bad_file:
-		printerr("second parameter of #line is not a valid filename");
-		break;
-	}
-	cleanup(s);
+	tp = yylval.sym->type;
+	if (yytoken != CONSTANT || tp->op != ARY && tp->type != chartype)
+		error("second parameter of #line is not a valid filename");
+	free(input->fname);
+	input->fname = xstrdup(yylval.sym->u.s);
 }
 
 static void
-pragma(char *s)
+pragma(void)
 {
 	if (cppoff)
 		return;
+	/* TODO: discard input */
 }
 
 static void
-usererr(char *s)
+usererr(void)
 {
 	if (cppoff)
 		return;
-	printerr("#error %s", s);
-	exit(1);
+	printerr("#error %s", input->p);
+	/* TODO: discard input */
 }
 
 static void
-ifclause(char *s, int isdef)
+ifclause(int isdef)
 {
 	Symbol *sym;
-	unsigned n = cppctx++;
+	unsigned n;
 
-	if (cppctx == NR_COND-1) {
-		printerr("too much nesting levels of conditional inclusion");
-		return;
+	if (cppctx == NR_COND-1)
+		error("too much nesting levels of conditional inclusion");
+	n = cppctx++
+	if (yytoken != IDEN) {
+		error("no macro name given in #%s directive",
+		      (isdef) ? "ifdef" : "ifndef");
 	}
-	if (!iden(&s)) {
-		printerr("no macro name given in #%s directive",
-		         (isdef) ? "ifdef" : "ifndef");
-		return;
-	}
+
 	sym = lookup(NS_CPP);
+	next();
 	if (!(ifstatus[n] = (sym->flags & ISDEFINED) != 0 == isdef))
 		++cppoff;
-	cleanup(s);
 }
 
 static void
-ifdef(char *s)
+ifdef(void)
 {
-	ifclause(s, 1);
+	ifclause(1);
 }
 
 static void
-ifndef(char *s)
+ifndef(void)
 {
-	ifclause(s, 0);
+	ifclause(0);
 }
 
 static void
-endif(char *s)
+endif(void)
 {
-	if (cppctx == 0) {
-		printerr("#endif without #if");
-		return;
-	}
+	if (cppctx == 0)
+		error("#endif without #if");
+
 	if (!ifstatus[--cppctx])
 		--cppoff;
-	cleanup(s);
 }
 
 static void
-elseclause(char *s)
+elseclause(void)
 {
 	struct ifstatus *ip;
 
-	if (cppctx == 0) {
-		printerr("#else without #ifdef/ifndef");
-		return;
-	}
+	if (cppctx == 0)
+		error("#else without #ifdef/ifndef");
+
 	cppoff += (ifstatus[cppctx-1] ^= 1) ? -1 : 1;
-	cleanup(s);
 }
 
 static void
-undef(char *s)
+undef(void)
 {
 	Symbol *sym;
 
-	if (!iden(&s)) {
-		printerr("no macro name given in #undef directive");
+	if (cppoff)
 		return;
+	if (yytoken != IDEN) {
+		error("no macro name given in #undef directive");
+		return;
 	}
 	sym = lookup(NS_CPP);
 	sym->flags &= ~ISDEFINED;
-	cleanup(s);
 }
 
 bool
-cpp(char *s)
+cpp(void)
 {
 	static struct {
-		char *name;
-		void (*fun)(char *);
-	} *bp, cmds[] =  {
-		"define", define,
-		"include", include,
-		"ifdef", ifdef,
-		"ifndef", ifndef,
-		"endif", endif,
-		"else", elseclause,
-		"undef", undef,
-		"line", line,
-		"pragma", pragma,
-		"error", usererr,
-		NULL, NULL
+		uint8_t tok;
+		void (*fun)(void);
+	} *bp, clauses [] = {
+		{DEFINE, define},
+		{INCLUDE, include},
+		{LINE, line},
+		{IFDEF, ifdef},
+		{IFNDEF, ifndef},
+		{ELSE, elseclause},
+		{UNDEF, undef},
+		{PRAGMA, pragma},
+		{ERROR, usererr},
+		{0, NULL}
 	};
 
-	if (*s++ != '#')
+	if (*input->p != '#')
 		return 0;
-	while (isspace(*s))
-		++s;
-	if (!iden(&s))
-		goto incorrect;
-	for (bp = cmds; bp->name; ++bp) {
-		if (strcmp(bp->name, yytext))
-			continue;
-		(*bp->fun)(s);
-		return 1;
-	}
-incorrect:
-	printerr("invalid preprocessor directive #%s", yytext);
+	++input->p;
+	lexmode = CPPMODE;
+	setnamespace(NS_CPPCLAUSES);
+	next();
+	for (bp = clauses; bp->tok && bp->tok != yytoken; ++bp)
+		/* nothing */;
+	if (!bp->tok)
+		error("incorrect preprocessor directive");
+	next();
+	(*bp->fun)();
+
+	if (yytoken != EOFTOK && !cppoff)
+		error("trailing characters after preprocessor directive");
+	lexmode = CCMODE;
 	return 1;
 }
--- a/cc1/error.c
+++ b/cc1/error.c
@@ -19,8 +19,9 @@
 		return;
 	if (flag < 0)
 		failure = 1;
-	fprintf(stderr, "%s:%s:%u: ",
-		(flag < 0) ? "error" : "warning", getfname(), getfline());
+	fprintf(stderr, "%s:%u: %s: ",
+	       input->fname, input->nline,
+	       (flag < 0) ? "error" : "warning");
 	vfprintf(stderr, fmt, va);
 	putc('\n', stderr);
 	if (flag < 0 && nerrors++ == MAXERRNUM) {
--- a/cc1/lex.c
+++ b/cc1/lex.c
@@ -11,38 +11,27 @@
 #include "../inc/cc.h"
 #include "cc1.h"
 
-typedef struct input Input;
-
-struct input {
-	char *fname;
-	unsigned short nline;
-	FILE *fp;
-	char *line, *begin, *p;
-	Symbol *macro;
-	struct input *next;
-};
-
 unsigned yytoken;
 struct yystype yylval;
 char yytext[STRINGSIZ+3];
 unsigned short yylen;
 int cppoff;
+int lexmode = CCMODE;
 
 static unsigned lex_ns = NS_IDEN;
 static int safe, eof;
-static Input *input;
+Input *input;
 
 static void
-allocinput(char *fname, FILE *fp, char *buff)
+allocinput(char *fname, FILE *fp)
 {
 	Input *ip;
 
 	ip = xmalloc(sizeof(Input));
-	ip->fname = fname;
+	ip->fname = xstrdup(fname);
+	ip->p = ip->begin = ip->line = xmalloc(INPUTSIZ);
+	ip->nline = 0;
 	ip->next = input;
-	ip->macro = NULL;
-	ip->begin = ip->line = buff;
-	ip->nline = (fp) ? 0 : input->nline;
 	ip->fp = fp;
 	input = ip;
 }
@@ -52,10 +41,6 @@
 {
 	FILE *fp;
 
-	/*
-	 * we can use static file names because this Input is not going
-	 * to be freed ever
-	 */
 	if (!fname) {
 		fp = stdin;
 		fname = "<stdin>";
@@ -64,38 +49,18 @@
 			die("error opening output:%s", strerror(errno));
 		fname = fname;
 	}
-	allocinput(fname, fp, xmalloc(INPUTSIZ));
+	allocinput(fname, fp);
 	*input->begin = '\0';
 }
 
 bool
-addinput(char *fname, Symbol *sym, char *str)
+addinput(char *fname)
 {
 	FILE *fp;
-	char flags = 0;
 
-	if (fname) {
-		/*
-		 * this call comes from an include clause, so we reuse
-		 * the buffer from the calling Input
-		 */
-		if ((fp = fopen(fname, "r")) == NULL)
-			return 0;
-		fname = xstrdup(fname);
-		str = input->line;
-		*str = '\0';
-	} else {
-		/*
-		 * This call comes from a macro expansion, so we have
-		 * to duplicate the input string because it is the
-		 * expansion of the macro in a temporal buffer
-		 */
-		fname = input->fname;
-		fp = NULL;
-		str = xstrdup(str);
-	}
-	allocinput(fname, fp, str);
-	input->macro = sym;
+	if ((fp = fopen(fname, "r")) == NULL)
+		return 0;
+	allocinput(fname, fp);
 	return 1;
 }
 
@@ -102,57 +67,26 @@
 static void
 delinput(void)
 {
-	Input *ip;
+	Input *ip = input;
 
-repeat:
-	if (input->fp) {
-		/* include input */
-		if (fclose(input->fp))
-			die("error reading from input file '%s'", input->fname);
-		if (!input->next) {
-			eof = 1;
-			return;
-		}
-		free(input->fname);
-	} else {
-		/* macro input */
-		free(input->line);
-	}
-	ip = input;
-	input = input->next;
-	free(ip);
-
-	if (*input->begin != '\0')
+	if (!ip->next)
+		eof = 1;
+	if (fclose(ip->fp))
+		die("error reading from input file '%s'", ip->fname);
+	if (eof)
 		return;
-	if (!input->fp)
-		goto repeat;
+	input = ip->next;
+	free(ip->fname);
+	free(ip->line);
 }
 
-void
-setfname(char *name)
+static void
+newline(void)
 {
-	free(input->fname);
-	input->fname = xstrdup(name);
+	if (++input->nline == 0)
+		die("error:input file '%s' too long", input->fname);
 }
 
-char *
-getfname(void)
-{
-	return input->fname;
-}
-
-void
-setfline(unsigned short line)
-{
-	input->nline = line;
-}
-
-unsigned short
-getfline(void)
-{
-	return input->nline;
-}
-
 static char
 readchar(void)
 {
@@ -160,22 +94,25 @@
 	FILE *fp;
 
 repeat:
-	if (feof(input->fp))
-		delinput();
-	if (eof)
-		return '\0';
 	fp = input->fp;
 
-	if ((c = getc(fp)) == '\\') {
-		if ((c = getc(fp)) == '\n')
+	switch (c = getc(fp)) {
+	case EOF:
+		c = '\0';
+		break;
+	case '\\':
+		if ((c = getc(fp)) == '\n') {
+			newline();
 			goto repeat;
+		}
 		ungetc(c, fp);
 		c = '\\';
-	} else if (c == EOF) {
-		c = '\n';
-	} else if (c == '\n' && ++input->nline == 0) {
-		die("error:input file '%s' too long", getfname());
+		break;
+	case '\n':
+		newline();
+		break;
 	}
+
 	return c;
 }
 
@@ -203,10 +140,17 @@
 	char *bp, *lim;
 	char c, peekc = 0;
 
+repeat:
+	input->begin = input->p = input->line;
+	*input->line = '\0';
 	if (eof)
 		return 0;
+	if (feof(input->fp)) {
+		delinput();
+		goto repeat;
+	}
 	lim = &input->line[INPUTSIZ-1];
-	for (bp = input->line; bp != lim; *bp++ = c) {
+	for (bp = input->line; bp < lim; *bp++ = c) {
 		c = (peekc) ? peekc : readchar();
 		peekc = 0;
 		if (c == '\n' || c == '\0')
@@ -222,8 +166,10 @@
 		}
 	}
 
-	if (bp == lim)
-		error("line %u too big in file '%s'", getfline(), getfname());
+	if (bp == lim) {
+		error("line %u too big in file '%s'",
+		      input->line, input->fname);
+	}
 	*bp = '\0';
 	return 1;
 }
@@ -231,24 +177,18 @@
 bool
 moreinput(void)
 {
-	char *p;
-
 repeat:
-	if (!input->fp)
-		delinput();
-	if (*input->begin)
-		return 1;
 	if (!readline())
 		return 0;
-	p = input->line;
-	while (isspace(*p))
-		++p;
-	if (*p == '\0' || cpp(p) || cppoff) {
+	while (isspace(*input->p))
+		++input->p;
+	input->begin = input->p;
+	if (*input->p == '\0' || cpp() || cppoff) {
 		*input->begin = '\0';
 		goto repeat;
 	}
 
-	input->p = input->begin = p;
+	input->begin = input->p;
 	return 1;
 }
 
@@ -403,7 +343,9 @@
 
 	*bp++ = '"';
 repeat:
-	for (++input->p; (c = *input->p) != '\0' && c != '"'; ++input->p) {
+	for (++input->p; (c = *input->p) != '"'; ++input->p) {
+		if (c == '\0')
+			error("missing terminating '\"' character");
 		if (c == '\\')
 			c = escape();
 		if (bp == &yytext[STRINGSIZ+1])
@@ -410,9 +352,8 @@
 			error("string too long");
 		*bp++ = c;
 	}
-	if (c == '\0')
-		error("missing terminating '\"' character");
-	input->begin = input->p + 1;
+
+	input->begin = ++input->p;
 	if (ahead() == '"')
 		goto repeat;
 	*bp = '\0';
@@ -430,15 +371,16 @@
 iden(void)
 {
 	Symbol *sym;
-	char *p, *t, c;
+	char *p, *begin;
 
-	for (p = input->p; isalnum(*p) || *p == '_'; ++p)
+	begin = input->p;
+	for (p = begin; isalnum(*p) || *p == '_'; ++p)
 		/* nothing */;
 	input->p = p;
 	tok2str();
 	yylval.sym = sym = lookup(lex_ns);
 	if (sym->ns == NS_CPP) {
-		if (!disexpand && sym != input->macro && expand(sym))
+		if (!disexpand && expand(begin, sym))
 			return next();
 		/*
 		 * it is not a correct macro call, so try to find
@@ -554,17 +496,15 @@
 static void
 skipspaces(void)
 {
-	char *p;
-
 repeat:
-	for (p = input->begin; isspace(*p); ++p)
-		/* nothing */;
-	if (*p == '\0') {
+	while (isspace(*input->p))
+		++input->p;
+	if (*input->p == '\0' && lexmode != CPPMODE) {
 		if (!moreinput())
 			return;
 		goto repeat;
 	}
-	input->begin = input->p = p;
+	input->begin = input->p;
 }
 
 unsigned
@@ -573,14 +513,15 @@
 	char c;
 
 	skipspaces();
-	if (eof) {
-		if (cppctx)
-			error("#endif expected");
+	c = *input->begin;
+	if ((eof || lexmode == CPPMODE) && c == '\0') {
 		strcpy(yytext, "<EOF>");
-		return yytoken = EOFTOK;
+		if (cppctx && eof)
+			error("#endif expected");
+		yytoken = EOFTOK;
+		goto exit;
 	}
 
-	c = *input->begin;
 	if (isalpha(c) || c == '_')
 		yytoken = iden();
 	else if (isdigit(c))
@@ -592,8 +533,8 @@
 	else
 		yytoken = operator();
 
-	fputs(yytext, stderr);
-	putc('\n', stderr);
+exit:
+	fprintf(stderr, "%s\n", yytext);
 	lex_ns = NS_IDEN;
 	return yytoken;
 }
--- a/cc1/symbol.c
+++ b/cc1/symbol.c
@@ -191,7 +191,7 @@
 	static struct {
 		char *str;
 		unsigned char token, value;
-	} *bp, buff[] = {
+	} *bp, keywords[] = {
 		{"auto", SCLASS, AUTO},
 		{"break", BREAK, BREAK},
 		{"_Bool", TYPE, BOOL},
@@ -227,14 +227,32 @@
 		{"volatile", TQUALIFIER, VOLATILE},
 		{"while", WHILE, WHILE},
 		{NULL, 0, 0},
-	};
+	}, cppclauses[] = {
+		{"define", DEFINE, DEFINE},
+		{"include", INCLUDE, INCLUDE},
+		{"line", LINE, LINE},
+		{"ifdef", IFDEF, IFDEF},
+		{"else", ELSE, ELSE},
+		{"ifndef", IFNDEF, IFNDEF},
+		{"undef", UNDEF, UNDEF},
+		{"pragma", PRAGMA, PRAGMA},
+		{"error", ERROR, ERROR}
+	}, *list[] = {
+		keywords,
+		cppclauses,
+		NULL
+	}, **lp;
 	Symbol *sym;
+	int ns = NS_KEYWORD;
 
-	for (bp = buff; bp->str; ++bp) {
-		strcpy(yytext, bp->str);
-		sym = lookup(NS_KEYWORD);
-		sym->token = bp->token;
-		sym->u.token = bp->value;
+	for (lp = list; *lp; ++lp) {
+		for (bp = *lp; bp->str; ++bp) {
+			strcpy(yytext, bp->str);
+			sym = lookup(ns);
+			sym->token = bp->token;
+			sym->u.token = bp->value;
+		}
+		ns = NS_CPPCLAUSES;
 	}
 	globalcnt = 0;
 }