shithub: scc

Download patch

ref: fc8190e4863a2d300391578be850614b7b3f4b9d
parent: c1c6db84269708d87a5b8d0e78225768c36b36ed
author: Roberto E. Vargas Caballero <[email protected]>
date: Wed May 20 14:16:40 EDT 2015

Rewrite lookup() in cc1

This new version removes some ugly cases in decl.c, where we had to
lookup, and later on insert. In this new version lookup() accept a
new parameter that indicates that we want to insert a new element.

--- a/cc1/cc1.h
+++ b/cc1/cc1.h
@@ -91,6 +91,7 @@
 	NS_TAG,
 	NS_LABEL,
 	NS_CPP,
+	NS_KEYWORD,
 	NS_STRUCTS
 };
 
@@ -251,8 +252,9 @@
 extern Type *mktype(Type *tp, uint8_t op, short nelem, void *data);
 
 /* symbol.c */
-extern Symbol *lookup(char *s, unsigned char ns);
-extern Symbol *install(char *s, unsigned char ns);
+extern Symbol *lookup(uint8_t ns);
+extern Symbol *install(uint8_t ns);
+extern Symbol *newsym(uint8_t ns);
 extern void pushctx(void), popctx(void);
 
 /* stmt.c */
@@ -273,6 +275,7 @@
 extern void setfline(unsigned short line);
 extern bool addinput(char *fname);
 extern void delinput(void);
+extern void setnamespace(uint8_t ns);
 #define accept(t) ((yytoken == (t)) ? next() : 0)
 
 /* code.c */
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
@@ -132,7 +132,8 @@
 static char *
 define(char *s)
 {
-	char *t, name[IDENTSIZ+1];
+	extern char yytext[];
+	char *t;
 	size_t len;
 	Symbol *sym;
 
@@ -142,9 +143,15 @@
 		/* nothing */;
 	if ((len = t - s) > IDENTSIZ)
 		goto too_long;
-	strncpy(name, s, len);
-	name[len] = '\0';
-	sym = install(name, NS_CPP);
+	strncpy(yytext, s, len);
+	yytext[len] = '\0';
+	sym = lookup(NS_CPP);
+	if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) {
+		warn("'%s' redefined", yytext);
+		free(sym->u.s);
+	}
+	sym->flags |= ISDEFINED;
+	sym->ns = NS_CPP;
 
 	for (s = t; isspace(*s); ++s)
 		/* nothing */;
--- a/cc1/decl.c
+++ b/cc1/decl.c
@@ -88,19 +88,6 @@
 	return queue(dp, FTN, n, tp);
 }
 
-static Symbol *
-newiden(uint8_t ns)
-{
-	Symbol *sym;
-	extern uint8_t curctx;
-
-	if (yylval.sym && yylval.sym->ctx == curctx && yylval.sym->ns == ns)
-		error("redeclaration of '%s'", yytext);
-	sym = install(yytext, ns);
-	next();
-	return sym;
-}
-
 static struct dcldata *declarator0(struct dcldata *dp, uint8_t ns);
 
 static struct dcldata *
@@ -112,10 +99,13 @@
 		dp = declarator0(dp, ns);
 		expect(')');
 	} else {
-		if (yytoken == IDEN || yytoken == TYPEIDEN)
-			sym = newiden(ns);
-		else
-			sym = install(NULL, ns);
+		if (yytoken == IDEN || yytoken == TYPEIDEN) {
+			if ((sym = install(ns)) == NULL)
+				error("redeclaration of '%s'", yytext);
+			next();
+		} else {
+			sym = newsym(ns);
+		}
 		dp = queue(dp, IDEN, 0, sym);
 	}
 
@@ -269,19 +259,22 @@
 }
 
 static Symbol *
-newtag(uint8_t tag)
+newtag(void)
 {
 	Symbol *sym;
+	uint8_t tag = yylval.token;
 	static uint8_t ns = NS_STRUCTS;
 
+	setnamespace(NS_TAG);
+	next();
 	switch (yytoken) {
-	case IDEN: case TYPEIDEN:
-		if ((sym = lookup(yytext, NS_TAG)) == NULL)
-			sym = install(yytext, NS_TAG);
+	case IDEN:
+	case TYPEIDEN:
+		sym = yylval.sym;
 		next();
 		break;
 	default:
-		sym = install(NULL, NS_TAG);
+		sym = newsym(NS_TAG);
 		break;
 	}
 	if (!sym->type) {
@@ -290,7 +283,8 @@
 		sym->type = mktype(NULL, tag, 0, NULL);
 		sym->type->ns = ns++;
 	}
-	
+
+	sym->flags |= ISDEFINED;
 	if (sym->type->op != tag)
 		error("'%s' defined as wrong kind of tag", yytext);
 	return sym;
@@ -303,12 +297,10 @@
 {
 	Type *tagtype, *buff[NR_MAXSTRUCTS], **bp = &buff[0];
 	Symbol *tagsym, *sym;
-	uint8_t tag, n;
+	uint8_t n;
 	size_t siz;
 
-	tag = yylval.token;
-	next();
-	tagsym = newtag(tag);
+	tagsym = newtag();
 	tagtype = tagsym->type;
 	if (!accept('{'))
 		return tagtype;
@@ -368,8 +360,7 @@
 	Symbol *sym;
 	int val = 0;
 
-	next();
-	tp = newtag(ENUM)->type;
+	tp = newtag()->type;
 
 	if (yytoken == ';')
 		return tp;
@@ -381,7 +372,9 @@
 	while (yytoken != '}') {
 		if (yytoken != IDEN)
 			unexpected();
-		sym = newiden(NS_IDEN);
+		if ((sym = install(NS_IDEN)) == NULL)
+			error("duplicated member '%s'", yytext);
+		next();
 		sym->type = inttype;
 		if (accept('='))
 			initializer(sym);
--- a/cc1/expr.c
+++ b/cc1/expr.c
@@ -333,18 +333,16 @@
 static Node *
 field(Node *np)
 {
-	extern uint8_t lex_ns;
 	Symbol *sym;
 
 	switch (BTYPE(np)) {
 	case STRUCT: case UNION:
-		lex_ns = np->type->ns;
+		setnamespace(np->type->ns);
 		next();
 		if (yytoken != IDEN)
 			unexpected();
 		if ((sym = yylval.sym) == NULL)
 			error("incorrect field in struct/union");
-		lex_ns = NS_IDEN;
 		next();
 		return node(OFIELD, sym->type, varnode(sym), np);
 	default:
@@ -470,9 +468,9 @@
 		next();
 		break;
 	case IDEN:
-		if (yylval.sym == NULL) {
-			yylval.sym = install(yytext, NS_IDEN);
+		if (!(yylval.sym->flags & ISDEFINED)) {
 			yylval.sym->type = inttype;
+			yylval.sym->flags |= ISDEFINED;
 			error("'%s' undeclared", yytext);
 		}
 		np = varnode(yylval.sym);
--- a/cc1/lex.c
+++ b/cc1/lex.c
@@ -23,7 +23,7 @@
 	struct input *next;
 };
 
-uint8_t lex_ns = NS_IDEN;
+static uint8_t lex_ns = NS_IDEN;
 
 uint8_t yytoken;
 struct yystype yylval;
@@ -246,7 +246,7 @@
 
 convert:
 	tp = ctype(INT, sign, size);
-	sym = install(NULL, NS_IDEN);
+	sym = newsym(NS_IDEN);
 	sym->type = tp;
 	v = strtol(s, NULL, base);
 	if (tp == inttype)
@@ -345,7 +345,7 @@
 		error("invalid character constant");
 	++input->p;
 
-	sym = install(NULL, NS_IDEN);
+	sym = newsym(NS_IDEN);
 	sym->u.i = c;
 	sym->type = inttype;
 	yylval.sym = sym;
@@ -386,7 +386,7 @@
 	}
 
 	*bp = '\0';
-	sym = install(NULL, NS_IDEN);
+	sym = newsym(NS_IDEN);
 	sym->u.s = xstrdup(buf);
 	sym->type = mktype(chartype, ARY, (bp - buf) + 1, NULL);
 	yylval.sym = sym;
@@ -403,8 +403,8 @@
 		/* nothing */;
 	input->p = p;
 	tok2str();
-	sym = yylval.sym = lookup(yytext, lex_ns);
-	if (!sym || sym->token == IDEN)
+	sym = yylval.sym = lookup(lex_ns);
+	if (sym->token == IDEN)
 		return IDEN;
 	yylval.token = sym->u.token;
 	return sym->token;
@@ -502,6 +502,13 @@
 	return t;
 }
 
+/* TODO: Ensure that lex_ns is NS_IDEN after a recovery */
+void
+setnamespace(uint8_t ns)
+{
+	lex_ns = ns;
+}
+
 uint8_t
 next(void)
 {
@@ -525,6 +532,7 @@
 	} else {
 		yytoken = operator();
 	}
+	lex_ns = NS_IDEN;
 	return yytoken;
 }
 
--- a/cc1/stmt.c
+++ b/cc1/stmt.c
@@ -13,28 +13,6 @@
 extern Node *iszero(Node *np), *eval(Node *np);
 static void stmt(Symbol *lbreak, Symbol *lcont, Caselist *lswitch);
 
-static Symbol *
-label(char *s, char define)
-{
-	Symbol *sym;
-
-	if ((sym = lookup(s, NS_LABEL)) != NULL) {
-		if (define) {
-			if (sym->flags & ISDEFINED)
-				error("label '%s' already defined", s);
-			sym->flags |= ISDEFINED;
-		}
-		return sym;
-	}
-
-	sym = install(s, NS_LABEL);
-	if (define)
-		sym->flags |= ISDEFINED;
-	else
-		sym->flags &= ~ISDEFINED;
-	return sym;
-}
-
 static void
 stmtexp(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
 {
@@ -72,9 +50,9 @@
 	Symbol *begin, *cond, *end;
 	Node *np;
 
-	begin = install(NULL, NS_LABEL);
-	end = install(NULL, NS_LABEL);
-	cond = install(NULL, NS_LABEL);
+	begin = newsym(NS_LABEL);
+	end = newsym(NS_LABEL);
+	cond = newsym(NS_LABEL);
 
 	expect(WHILE);
 	np = condition();
@@ -95,9 +73,9 @@
 	Symbol *begin, *cond, *end;
 	Node *econd, *einc, *einit;
 
-	begin = install(NULL, NS_LABEL);
-	end = install(NULL, NS_LABEL);
-	cond = install(NULL, NS_LABEL);
+	begin = newsym(NS_LABEL);
+	end = newsym(NS_LABEL);
+	cond = newsym(NS_LABEL);
 
 	expect(FOR);
 	expect('(');
@@ -127,8 +105,8 @@
 	Symbol *begin, *end;
 	Node *np;
 
-	begin = install(NULL, NS_LABEL);
-	end = install(NULL, NS_LABEL);
+	begin = newsym(NS_LABEL);
+	end = newsym(NS_LABEL);
 	expect(DO);
 	emit(OBLOOP, NULL);
 	emit(OLABEL, begin);
@@ -179,9 +157,24 @@
 static void
 Label(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
 {
+	Symbol *sym;
+
 	switch (yytoken) {
-	case IDEN: case TYPEIDEN:
-		emit(OLABEL, label(yytext, 1));
+	case IDEN:
+	case TYPEIDEN:
+		/*
+		 * We cannot call to insert() because the call to lookup in
+	     * lex.c was done in NS_IDEN namespace, and it is impossibe
+		 * to fix this point, because an identifier at the beginning
+		 * of a statement may be part of an expression or part of a
+		 * label. This double call to lookup() is going to generate
+		 * an undefined symbol that is not going to be used ever.
+		 */
+		sym = lookup(NS_LABEL);
+		if (sym->flags & ISDEFINED)
+			error("label '%s' already defined", yytoken);
+		sym->flags |= ISDEFINED;
+		emit(OLABEL, sym);
 		next();
 		expect(':');
 		stmt(lbreak, lcont, lswitch);
@@ -204,11 +197,11 @@
 static void
 Goto(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
 {
-	expect(GOTO);
-
+	setnamespace(NS_LABEL);
+	next();
 	if (yytoken != IDEN)
 		unexpected();
-	emit(OJUMP, label(yytext, 0));
+	emit(OJUMP, yylval.sym);
 	next();
 	expect(';');
 }
@@ -229,8 +222,8 @@
 		error("incorrect type in switch statement");
 	expect (')');
 
-	lbreak = install(NULL, NS_LABEL);
-	lcond = install(NULL, NS_LABEL);
+	lbreak = newsym(NS_LABEL);
+	lcond = newsym(NS_LABEL);
 	emit(OJUMP, lcond);
 	stmt(lbreak, lcont, &lcase);
 	emit(OLABEL, lcond);
@@ -263,7 +256,7 @@
 	pcase = xmalloc(sizeof(*pcase));
 	pcase->expr = np;
 	pcase->next = lswitch->head;
-	emit(OLABEL, pcase->label = install(NULL, NS_LABEL));
+	emit(OLABEL, pcase->label = newsym(NS_LABEL));
 	lswitch->head = pcase;
 	++lswitch->nr;
 }
@@ -271,7 +264,7 @@
 static void
 Default(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
 {
-	Symbol *ldefault = install(NULL, NS_LABEL);
+	Symbol *ldefault = newsym(NS_LABEL);
 
 	expect(DEFAULT);
 	expect(':');
@@ -285,7 +278,7 @@
 	Symbol *end, *lelse;
 	Node *np;
 
-	lelse = install(NULL, NS_LABEL);
+	lelse = newsym(NS_LABEL);
 	expect(IF);
 	np = condition();
 	emit(OBRANCH, lelse);
@@ -292,7 +285,7 @@
 	emit(OEXPR, negate(np));
 	stmt(lbreak, lcont, lswitch);
 	if (accept(ELSE)) {
-		end = install(NULL, NS_LABEL);
+		end = newsym(NS_LABEL);
 		emit(OJUMP, end);
 		emit(OLABEL, lelse);
 		stmt(lbreak, lcont, lswitch);
@@ -355,7 +348,8 @@
 	case CASE:     fun = Case;     break;
 	case DEFAULT:  fun = Default;  break;
 	default:       fun = stmtexp;  break;
-	case TYPEIDEN: case IDEN:
+	case TYPEIDEN:
+	case IDEN:
 		fun = (ahead() == ':') ? Label : stmtexp;
 		break;
 	case '@':
--- a/cc1/symbol.c
+++ b/cc1/symbol.c
@@ -69,7 +69,7 @@
 	sym->id = (curctx) ? ++localcnt : ++globalcnt;
 	sym->ctx = curctx;
 	sym->token = IDEN;
-	sym->flags = 0;
+	sym->flags = ISDEFINED;
 	sym->name = NULL;
 	sym->type = NULL;
 	sym->hash = NULL;
@@ -79,32 +79,58 @@
 }
 
 Symbol *
-lookup(char *s, uint8_t ns)
+lookup(uint8_t ns)
 {
-	Symbol *sym;
+	Symbol *sym, **h;
+	uint8_t sns;
+	char *t, c;
 
-	for (sym = htab[hash(s)]; sym; sym = sym->hash) {
-		if (!strcmp(sym->name, s) && sym->ns == ns)
+	h = &htab[hash(yytext)];
+	c = *yytext;
+	for (sym = *h; sym; sym = sym->hash) {
+		t = sym->name;
+		if (*t != c || strcmp(t, yytext))
+			continue;
+		sns = sym->ns;
+		if (sns == NS_KEYWORD || sns == NS_CPP)
 			return sym;
+		if (sns != ns)
+			continue;
+		return sym;
 	}
 
-	return NULL;
+	sym = newsym(ns);
+	sym->name = xstrdup(yytext);
+	sym->flags &= ~ISDEFINED;
+	sym->hash = *h;
+	*h = sym;
+	return sym;
 }
 
 Symbol *
-install(char *s, uint8_t ns)
+install(uint8_t ns)
 {
-	Symbol *sym, **t;
+	Symbol *sym, **h;
+	/*
+	 * install() is always called after a call to lookup(), so
+	 * yylval.sym always points to a symbol with yytext name.
+	 * if the symbol is an undefined symbol and in the same
+	 * context, then it was generated in the previous lookup()
+	 * call. If the symbol is defined and in the same context
+	 * then there is a redefinition
+	 */
+	if (yylval.sym->ctx == curctx) {
+		if (yylval.sym->flags & ISDEFINED)
+			return NULL;
+		yylval.sym->flags |= ISDEFINED;
+		return yylval.sym;
+	}
 
+	h = &htab[hash(yytext)];
 	sym = newsym(ns);
-	sym->flags |= ISDEFINED;
-
-	if (s) {
-		sym->name = xstrdup(s);
-		t = &htab[hash(s)];
-		sym->hash = *t;
-		*t = sym;
-	}
+	sym->name = xstrdup(yytext);
+	sym->hash = *h;
+	*h = sym;
 	return sym;
 }
 
@@ -154,7 +180,8 @@
 	Symbol *sym;
 
 	for (bp = buff; bp->str; ++bp) {
-		sym = install(bp->str, NS_IDEN);
+		strcpy(yytext, bp->str);
+		sym = lookup(NS_KEYWORD);
 		sym->token = bp->token;
 		sym->u.token = bp->value;
 	}