ref: 8b6cc849d8e198cfa639b4de3ad84a22f2d36e07
parent: 1ffd5f6ee169b5d76958348b3376d82a66a75d63
author: Roberto E. Vargas Caballero <[email protected]>
date: Sun Jun 10 15:12:51 EDT 2012
Unified keywords and symbols This helps in simplify the code, using the same code for both, keywords and symbols. After this patch the lexical is simpler, because doesn't care about the keywords.
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-OBJS = types.o decl.o lex.o error.o symbol.o flow.o main.o expr.o
+OBJS = types.o decl.o lex.o error.o symbol.o flow.o main.o expr.o keyword.o
LIBS =
all: kcc
--- /dev/null
+++ b/keyword.c
@@ -1,0 +1,60 @@
+#include <stddef.h>
+
+#include "tokens.h"
+#include "types.h"
+#include "symbol.h"
+
+
+static struct keyword {
+ char *str;
+ unsigned char tok;
+} keywords [] = {"auto", AUTO,
+ "break", BREAK,
+ "_Bool", CHAR,
+ "case", CASE,
+ "char", CHAR,
+ "const", CONST,
+ "continue", CONTINUE,
+ "default", DEFAULT,
+ "do", DO,
+ "double", DOUBLE,
+ "else", ELSE,
+ "enum", ENUM,
+ "extern", EXTERN,
+ "float", FLOAT,
+ "for", FOR,
+ "goto", GOTO,
+ "if", IF,
+ "int", INT,
+ "long", LONG,
+ "register", REGISTER,
+ "restricted", RESTRICTED,
+ "return", RETURN,
+ "short", SHORT,
+ "signed", SIGNED,
+ "sizeof", SIZEOF,
+ "static", STATIC,
+ "struct", STRUCT,
+ "switch", SWITCH,
+ "typedef", TYPEDEF,
+ "union", UNION,
+ "unsigned", UNSIGNED,
+ "void", VOID,
+ "volatile", VOLATILE,
+ "while", WHILE,
+ NULL, 0,
+};
+
+void init_keywords(void)
+{
+ register struct keyword *bp;
+ register struct symbol *sym;
+ extern void init_symbol(void);
+
+ init_symbol();
+ for (bp = keywords; bp->str; bp++) {
+ sym = install(bp->str, hashfun(bp->str));
+ sym->tok = bp->tok;
+ sym->type = T_KWD;
+ }
+}
--- a/lex.c
+++ b/lex.c
@@ -5,56 +5,12 @@
#include <ctype.h>
#include "cc.h"
-#include "symbol.h"
#include "tokens.h"
+#include "symbol.h"
+#include "types.h"
-#define NR_KWD_HASH 32
-
-static struct keyword {
- char *str;
- unsigned char tok;
- struct keyword *next;
-} keywords [] = {"auto", AUTO, NULL,
- "break", BREAK, NULL,
- "_Bool", CHAR, NULL,
- "case", CASE, NULL,
- "char", CHAR, NULL,
- "const", CONST, NULL,
- "continue", CONTINUE, NULL,
- "default", DEFAULT, NULL,
- "do", DO, NULL,
- "double", DOUBLE, NULL,
- "else", ELSE, NULL,
- "enum", ENUM, NULL,
- "extern", EXTERN, NULL,
- "float", FLOAT, NULL,
- "for", FOR, NULL,
- "goto", GOTO, NULL,
- "if", IF, NULL,
- "int", INT, NULL,
- "long", LONG, NULL,
- "register", REGISTER, NULL,
- "restricted", RESTRICTED, NULL,
- "return", RETURN, NULL,
- "short", SHORT, NULL,
- "signed", SIGNED, NULL,
- "sizeof", SIZEOF, NULL,
- "static", STATIC, NULL,
- "struct", STRUCT, NULL,
- "switch", SWITCH, NULL,
- "typedef", TYPEDEF, NULL,
- "union", UNION, NULL,
- "unsigned", UNSIGNED, NULL,
- "void", VOID, NULL,
- "volatile", VOLATILE, NULL,
- "while", WHILE, NULL,
- NULL, 0, NULL
-};
-
-static struct keyword *khash[NR_KWD_HASH];
static FILE *yyin;
-
union yyval yyval;
unsigned char yytoken;
unsigned char yyhash;
@@ -64,37 +20,6 @@
const char *filename;
-static unsigned char hashfun(register const char *s)
-{
- register unsigned char h, ch;
-
- for (h = 0; ch = *s++; h += ch)
- /* nothing */;
- return h;
-}
-
-void init_lex(void)
-{
- register struct keyword *bp;
- static unsigned char h;
-
- for (bp = keywords; bp->str; bp++) {
- register struct keyword *aux, *ant;
- h = hashfun(bp->str) & (NR_KWD_HASH - 1);
- if (!(aux = khash[h]) || strcmp(bp->str, aux->str) < 0) {
- khash[h] = bp;
- bp->next = aux;
- continue;
- }
- for (ant = aux; aux; ant = aux, aux = aux->next) {
- if (strcmp(bp->str, aux->str) < 0)
- break;
- }
- ant->next = bp;
- bp->next = aux;
- }
-}
-
static char number(void)
{
register char *bp;
@@ -106,27 +31,17 @@
}
if (bp == yytext + TOKSIZ_MAX)
error("identifier too long %s", yytext);
- ungetc(ch, yyin);
*bp = '\0';
+ ungetc(ch, yyin);
+
return CONSTANT;
}
-static unsigned char keyword(const char *s, unsigned char key)
-{
- register struct keyword *kwp;
-
- key &= NR_KWD_HASH - 1;
- for (kwp = khash[key]; kwp; kwp = kwp->next) {
- if (!strcmp(kwp->str, yytext))
- return kwp->tok;
- }
- return 0;
-}
-
static unsigned char iden(void)
{
register char ch;
register char *bp = yytext;
+ register struct symbol *sym;
for (yyhash = 0; bp < yytext + TOKSIZ_MAX; *bp++ = ch) {
if (!isalnum(ch = getc(yyin)) && ch != '_')
@@ -137,10 +52,10 @@
error("identifier too long %s", yytext);
*bp = '\0';
ungetc(ch, yyin);
-
- if (ch = keyword(yytext, yyhash))
- return ch;
- return IDEN;;
+ if ((sym = lookup(yytext, yyhash)) && sym->type == T_KWD)
+ return sym->tok;
+ yyval.sym = sym;
+ return IDEN;
}
static unsigned char skip(void)
--- a/main.c
+++ b/main.c
@@ -6,7 +6,6 @@
#include "syntax.h"
extern void open_file(const char *file);
-extern void init_lex();
extern void init_symbol();
struct user_opt user_opt;
@@ -15,8 +14,7 @@
int main(int argc, char *argv[])
{
- init_lex();
- init_symbol();
+ init_keywords();
open_file(NULL);
for (next(); yytoken != EOFTOK; decl())
/* nothing */;
--- a/symbol.c
+++ b/symbol.c
@@ -65,7 +65,8 @@
head->h_next = sym;
next->h_prev = sym;
} else {
- sym->h_next = sym->h_prev = sym->str = NULL;
+ sym->h_next = sym->h_prev = NULL;
+ sym->str = NULL;
}
return sym;
}
@@ -88,4 +89,13 @@
for (bp = iden_hash.buf; bp < &iden_hash.buf[NR_SYM_HASH]; ++bp)
bp->h_next = bp->h_prev = bp;
+}
+
+unsigned char hashfun(register const char *s)
+{
+ register unsigned char h, ch;
+
+ for (h = 0; ch = *s++; h += ch)
+ /* nothing */;
+ return h;
}
--- a/symbol.h
+++ b/symbol.h
@@ -3,18 +3,21 @@
#ifndef SYMBOL_H
#define SYMBOL_H
-
struct type;
struct symbol {
- char *str;
- unsigned char level;
struct type *type;
+ union {
+ struct { /* used in usual symbols */
+ char *str;
+ unsigned char level;
+ };
+ unsigned char tok; /* used in keywords */
+ };
struct symbol *next;
struct symbol *h_next, *h_prev;
};
-
struct symctx {
struct symbol *iden;
struct symctx *next;
@@ -23,7 +26,8 @@
extern void new_ctx(struct symctx *ctx);
extern void del_ctx(void);
-extern struct symbol *addsym(const char *s, unsigned char key);
-extern struct symbol *lookupsym(char *s, unsigned char key);
+extern struct symbol *install(const char *s, unsigned char key);
+extern struct symbol *lookup(char *s, unsigned char key);
+extern unsigned char hashfun(register const char *s);
#endif
--- a/tokens.h
+++ b/tokens.h
@@ -33,7 +33,7 @@
};
-
+struct symbol;
union yyval {
struct symbol *sym;
};
@@ -51,4 +51,5 @@
extern unsigned char next(void);
extern char accept(unsigned char tok);
extern void expect(unsigned char tok);
+extern void init_keywords(void);
#endif
--- a/types.c
+++ b/types.c
@@ -23,6 +23,7 @@
struct type tllong = {.btype = LLONG, .sign = 1};
struct type tullong = {.btype = LLONG, .sign = 0};
struct type tvoid = {.btype = VOID, .sign = 0};
+struct type tkeyword;
#define TYPEOP_MAX PTRLEVEL_MAX /* TODO: take a look of the ANSI standard */
--- a/types.h
+++ b/types.h
@@ -24,7 +24,7 @@
extern struct type tschar, tuchar, tshort, tushort, tint, tuint;
extern struct type tfloat, tdouble, tldouble, tlong;
-extern struct type tulong, tllong, tullong, tvoid;
+extern struct type tulong, tllong, tullong, tvoid, tkeyword;
#define T_SCHAR (&tschar)
#define T_UCHAR (&tuchar)
@@ -40,6 +40,7 @@
#define T_LLONG (&tllong)
#define T_ULLONG (&tullong)
#define T_VOID (&tvoid)
+#define T_KWD (&tkeyword)
#define ARY 1
#define PTR 2