shithub: scc

Download patch

ref: 0ece8c92df0019d3eb4d1d427f6001d18104e357
parent: 8460bb981bb821656f7bd54a7d5f647a7a9ff400
author: Roberto E. Vargas Caballero <[email protected]>
date: Wed Feb 8 04:54:48 EST 2012

First version

This version only has partiall support for declarations, and it is
"a in progress" version.

--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,3 @@
+*.o
+makefile
+kcc
--- /dev/null
+++ b/Makefile
@@ -1,0 +1,32 @@
+
+OBJS = types.o decl.o lex.o error.o symbol.o
+LIBS =
+
+all: kcc
+
+kcc: $(OBJS)
+	$(CC) $(LDFLAGS) $(CFLAGS) $(LIBS) $(OBJS) -o $@
+
+%.d: %.c
+	$(CC) -M $(CPPFLAGS) $< | \
+	sed -e 's,/usr/[^ ]*,,g' | \
+	egrep -v '^ *\\$$' > $@
+
+.PHONY: clean distclean dep
+
+dep: $(OBJS:.o=.d)
+	cat Makefile $? > makefile
+	rm -f *.d
+
+clean:
+	rm -f $(OBJS)
+	rm -f cc
+
+distclean: clean
+	rm -f *~
+	rm -f *.d
+	rm -f makefile
+
+
+
+###Dependencies
--- /dev/null
+++ b/cc.h
@@ -1,0 +1,18 @@
+#ifndef CC_H
+#define CC_H
+
+extern unsigned linenum;
+extern unsigned columnum;
+extern const char *filename;
+
+struct {
+	unsigned implicit_int : 1;
+	unsigned c99 : 1;
+} user_opt;
+
+
+extern void warning(const char *fmt, ...);
+extern void error(const char *fmt, ...);
+extern void die(const char *fmt, ...);
+extern void warning_error(char flag, const char *fmt, ...);
+#endif
--- /dev/null
+++ b/decl.c
@@ -1,0 +1,332 @@
+#include <assert.h>
+#include <stddef.h>
+
+#include "cc.h"
+#include "tokens.h"
+#include "types.h"
+
+/* ANSI C says minimum maximum for indirection level is 12 */
+#define PTRLEVEL_MAX 12
+
+char parser_out_home;
+
+#ifndef NDEBUG
+#include <stdio.h>
+
+static void ptype(register struct type *t)
+{
+	assert(t);
+	
+	for (; t; t = t->base) {
+		switch (t->op) {
+		case ARY:
+			fputs("array of ", stdout);
+			break;
+		case PTR:
+			fputs("pointer to ", stdout);
+			break;
+		case FTN:
+			fputs("function that returns ", stdout);
+			break;
+		default:
+			fputs("primitive data ", stdout);
+			break;
+		}
+	}
+	putchar('\n');
+}
+#else
+#  define ptype(t)
+#endif
+
+
+static unsigned char stack[30];
+static unsigned char *stackp = stack;
+
+#define push(x) (*stackp++ = (x))
+#define pop()   (*--stackp)
+#define empty() (stackp == stack)
+
+
+void decl(void);
+
+
+void dirdcl(void)
+{
+	puts("dirdecl");
+	if (yytoken == '(') {
+		gettok();
+		decl();
+		if (yytoken != ')')
+			error("expected ')'");
+		gettok();
+	} else if (yytoken == IDENTIFIER) {
+		gettok();
+		/* here we are!!! */;
+	}
+
+	for (;;) {
+		switch (yytoken) {
+		case '(':
+			push(FTN);
+			if (gettok() == ')')
+				gettok();
+			else
+				/* TODO: prototyped function */;
+			continue;
+		case '[':
+			push(ARY);
+			if (gettok() == ']')
+				gettok();
+			else
+				/* TODO: specify size of array */;
+			continue;
+		default:
+			printf("leaving dirdcl %c\n", yytoken);
+			return;
+		}
+	}
+}
+
+
+
+
+
+/*
+ *
+ */
+
+struct type *types[][2] = {{T_VOID, NULL},
+			   {T_SCHAR, T_UCHAR},
+			   {T_SHORT, T_USHORT},
+			   {T_INT, T_UINT},
+			   {T_LONG, T_ULONG},
+			   {T_LLONG, T_ULLONG},
+			   {T_FLOAT, NULL},
+			   {T_DOUBLE, NULL},
+			   {T_LDOUBLE, NULL}};
+
+#define F_VOID    0
+#define F_CHAR    1
+#define F_SHORT   2
+#define F_INT     3
+#define F_LONG    4
+#define F_LLONG   5
+#define F_FLOAT   6
+#define F_DOUBLE  7
+#define F_LDOUBLE 8
+
+struct type *specifier(void)
+{
+	static char sign, sclass, tqlf, nt;
+	struct type *t = NULL;
+	
+	tqlf = sign = sclass = 0;
+	for (;;) {
+		switch (gettok()) {
+		case TYPEDEF:case EXTERN:case STATIC:case AUTO:case REGISTER:
+			if (sclass != 0)
+				error("Two or more storage specifier");
+			sclass = yytoken;
+			continue;
+		case CONST: case VOLATILE: case RESTRICTED:
+			/* TODO */
+			continue;
+		case VOID:   nt = F_VOID;   goto check_type;
+		case CHAR:   nt = F_CHAR;   goto check_type;
+		case SHORT:  nt = F_SHORT;  goto check_type;
+		case INT:    nt = F_INT;    goto check_type;
+		case FLOAT:  nt = F_FLOAT;  goto check_type;
+		case DOUBLE: nt = F_DOUBLE; goto check_type;
+		case LONG:   nt = F_LONG;   goto check_type;
+		case SIGNED: case UNSIGNED:
+			if (sign != 0) {
+				error((sign != yytoken) ?
+				      "signed and unsigned in declaration" :
+				      "duplicated %s", yytext);
+			}
+			sign = yytoken;
+			if (t == NULL)
+				continue;     /* we don't have type now */
+			goto check_type;
+		case STRUCT:	/* TODO */
+		case UNION:	/* TODO */
+		case ENUM:	/* TODO */
+		case IDENTIFIER:
+			/* TODO */
+		default:
+			return t;
+		}
+	check_type:
+		if (nt == F_LONG) {
+			if (t == NULL || 
+			    t == T_INT || t == T_UINT || t == T_LONG) {
+				/* nothing */;
+			} else if (t == T_LONG || t == T_ULONG) {
+				nt = F_LLONG;
+			} else if (t == T_DOUBLE) {
+				nt = F_LDOUBLE;
+			} else if (t == T_LLONG || t == T_ULLONG) {
+				error("'long long long' is too long");
+			} else if (t == T_LDOUBLE) {
+				error("'long long double' is too long");
+			} else {
+				goto two_or_more_btype;
+			}
+		} else if (t != NULL) {
+			goto two_or_more_btype;
+		} if (nt == F_VOID && sign != 0) {
+			goto incorrect_sign;
+		} if (nt == F_CHAR && sign == 0) {
+			t = T_UCHAR;        /* char by default is unsigned */
+		} else if (!(t = types[nt][sign == UNSIGNED])) {
+			goto incorrect_sign;
+		}
+	}
+two_or_more_btype:
+	error("two or more basic types");
+incorrect_sign:
+	error("sign specifier applied to incorrect type");
+}
+
+
+#undef F_VOID
+#undef F_CHAR
+#undef F_SHORT
+#undef F_INT
+#undef F_LONG
+#undef F_LLong
+#undef F_FLOAT
+#undef F_DOUBLE
+#undef F_LDOUBLE
+
+
+void decl(void)
+{
+	unsigned char ns = 0;
+	unsigned char qlf[PTRLEVEL_MAX];
+
+	puts("decl");
+	for (ns = 0; yytoken == '*'; ns++) {
+		if (ns == PTRLEVEL_MAX)
+			error("Too much indirection levels");
+		switch (gettok()) {
+		case CONST:
+			if (!(qlf[ns] ^= 2))
+				goto duplicated;
+			continue;
+		case RESTRICTED:
+			if (!(qlf[ns] ^= 4))
+				goto duplicated;
+			continue;
+		case VOLATILE:
+			if (!(qlf[ns] ^= 8))
+				goto duplicated;
+			continue;
+		}
+	}
+	dirdcl();
+
+	if (ns)
+		push(PTR);	/* TODO: pointer qualifiers */
+	printf("leaving dcl %c\n", yytoken);
+	return;
+
+duplicated:
+	error("duplicated '%s'", yytext);
+}
+
+
+
+void declaration(void)
+{
+	struct type *t;
+
+	t = specifier();
+
+	for (; ; gettok()) {
+		decl();
+		if (yytoken != ',' && yytoken != ';')
+			error("unexpected", yytext);
+		while (!empty())
+			t = mktype(t, pop());
+		ptype(t);
+
+		if (yytoken == ',')
+			/* add variable */;
+		else if (yytoken == ';') {
+			/* end of sentence */;
+			return;
+		}
+	}
+}
+
+
+#if 0
+void specdcl(void)
+{
+	struct spec_type t = {0, 0, 0};
+
+repeat:
+	parser_out_home = 1;
+	switch (gettok()) {
+	case TYPEDEF:
+	case EXTERN:
+	case STATIC:
+	case AUTO:
+	case REGISTER:
+	case CONST:
+	case VOLATILE:
+	case SIGNED:
+	case UNSIGNED:
+		if (!(t.mods ^= MODIFIER(tok)))
+			error("duplicate '%s'", yytext);
+		goto repeat;
+	case IDENTIFIER:
+		/* This is incorrect!!! */
+		t.type = TYPE(INT);
+		if (user_opt.implicit_int) {
+			warning_error(user_opt.c99,
+				      "type defaults to ‘int’ in declaration"
+				      " of", yytext);
+		}
+		if (gettok() != ';')
+			goto non_end_after_id;
+		return ';';
+	case VOID:
+	case CHAR:
+	case INT:
+	case LONG:
+	case FLOAT:
+	case DOUBLE:
+		if (!(t.type ^= (1 << TYPE(tok))))
+			error("duplicate '%s'", yytext);
+
+	case STRUCT:
+	case UNION:
+	case ENUM:
+	case TYPE_NAME:
+		;
+	}
+
+
+non_end_after_id:
+	error("';' expected");
+}
+#endif
+
+
+
+
+#include <stddef.h>
+
+
+int main(int argc, char *argv[])
+{
+	init_lex();
+
+	open_file(NULL);
+	declaration();
+
+	return 0;
+}
--- /dev/null
+++ b/error.c
@@ -1,0 +1,57 @@
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "cc.h"
+
+
+
+
+static void warning_error_helper(char flag, const char *fmt, va_list va)
+{
+	fprintf(stderr, "%s:%s:%u:%u: ",
+		(!flag) ? "warning" : "error", filename, linenum, columnum);
+	vfprintf(stderr, fmt, va);
+	putc('\n', stderr);
+	if (flag)
+		exit(EXIT_FAILURE); /* TODO: uhmmmm */
+}
+
+
+void warning_error(char flag, const char *fmt, ...)
+{
+	va_list va;
+	va_start(va, fmt);
+	warning_error_helper(flag, fmt, va);
+	va_end(va);
+}
+
+
+void error(const char *fmt, ...)
+{
+	va_list va;
+	va_start(va, fmt);
+	warning_error_helper(1, fmt, va);
+	va_end(va);
+}
+
+
+void warning(const char *fmt, ...)
+{
+	va_list va;
+	va_start(va, fmt);
+	warning_error_helper(0, fmt, va);
+	va_end(va);
+}
+
+
+
+void die(const char *fmt, ...)
+{
+	va_list va;
+	va_start(va, fmt);
+	fprintf(stderr, fmt, va);
+	va_end(va);
+	exit(EXIT_FAILURE);
+}
--- /dev/null
+++ b/lex.c
@@ -1,0 +1,185 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "symbol.h"
+#include "tokens.h"
+
+
+#define TOKSIZ_MAX 21
+#define NR_KWD_HASH 32
+/* TODO: move hashfun here */
+
+static struct keyword {
+	char *str;
+	unsigned char tok;
+	struct keyword *next;
+} keywords [] = {"auto", AUTO, NULL,
+		 "break", BREAK, NULL,
+		 "_Bool", CHAR, NULL,
+		 "case", CASE, NULL,
+		 "char", CHAR, NULL,
+		 "const", CONST, NULL,
+		 "continue", CONTINUE, NULL,
+		 "default", DEFAULT, NULL,
+		 "do", DO, NULL,
+		 "double", DOUBLE, NULL,
+		 "else", ELSE, NULL,
+		 "enum", ENUM, NULL,
+		 "extern", EXTERN, NULL,
+		 "float", FLOAT, NULL,
+		 "for", FOR, NULL,
+		 "goto", GOTO, NULL,
+		 "if", IF, NULL,
+		 "int", INT, NULL,
+		 "long", LONG, NULL,
+		 "register", REGISTER, NULL,
+		 "restricted", RESTRICTED, NULL,
+		 "return", RETURN, NULL,
+		 "short", SHORT, NULL,
+		 "signed", SIGNED, NULL,
+		 "sizeof", SIZEOF, NULL,
+		 "static", STATIC, NULL,
+		 "struct", STRUCT, NULL,
+		 "switch", SWITCH, NULL,
+		 "typedef", TYPEDEF, NULL,
+		 "union", UNION, NULL,
+		 "unsigned", UNSIGNED, NULL,
+		 "void", VOID, NULL,
+		 "volatile", VOLATILE, NULL,
+		 "while", WHILE, NULL,
+		 NULL, 0, NULL
+};
+
+static struct keyword *khash[NR_KWD_HASH];
+static FILE *yyin;
+
+unsigned char yytoken;
+unsigned char yyhash;
+size_t yylen;
+char yytext[TOKSIZ_MAX + 1];
+unsigned linenum;
+unsigned columnum;
+const char *filename;
+
+
+union yyval {
+	struct symbol *sym;
+} yyval;
+
+
+
+void init_lex(void)
+{
+	register struct keyword *bp;
+	static unsigned char h;
+
+	for (bp = keywords; bp->str; bp++) {
+		register struct keyword *aux, *ant;
+		h = hashfun(bp->str);
+		if (!(aux = khash[h])) {
+			khash[h] = bp;
+			continue;
+		}
+		ant = aux;
+		while (aux && strcmp(bp->str, aux->str) < 0) {
+			ant = aux;
+			aux = aux->next;
+		}
+		ant->next = bp;
+		bp->next = aux;
+	}
+}
+
+static unsigned char iden(void)
+{
+	register struct keyword *kwp;
+	register char ch;
+	register char *bp = yytext;
+
+	for (yyhash = 0; bp < yytext + TOKSIZ_MAX; *bp++ = ch) {
+		if (!isalnum(ch = getc(yyin)) && ch != '_')
+			break;
+		yyhash += ch;
+	}
+	if (bp == yytext + TOKSIZ_MAX)
+		error("identifier too long %s", yytext);
+	ungetc(ch, yyin);
+	*bp = '\0';
+	yylen = bp - yytext;
+	yyhash &= NR_KWD_HASH - 1;
+	for (kwp = khash[yyhash]; kwp; kwp = kwp->next) {
+		if (!strcmp(kwp->str, yytext))
+			return kwp->tok;
+	}
+	return IDENTIFIER;
+}
+
+
+
+unsigned char gettok(void)
+{
+	static unsigned int c;
+	register unsigned char ch;
+	extern char parser_out_home;
+
+	while (isspace(c = getc(yyin)))
+		/* nothing */;
+	if (c == EOF) {
+		if (parser_out_home)
+			error("Find EOF while parsing");
+		else
+			return EOFTOK;
+	}
+	ch = c;
+	if (isalpha(ch) || ch == '_') {
+		ungetc(ch, yyin);
+		ch = iden();
+	} else if (isdigit(ch)) {
+		;
+	} else {
+		switch (ch) {
+		case '&': case '|': 
+			if ((c = getc(yyin)) == ch) {
+				ch |= 0x80; /* TODO */
+				break;
+			} else {
+				ungetc(c, yyin);
+			}
+		case '^': case '=': case '<': case '>':
+		case '*': case '+': case '-': case '/': 
+			if ((c = getc(yyin)) == '=') {
+				ch |= 0x80; /* TODO */
+				break;
+			} else {
+				ungetc(c, yyin);
+			}
+		case ';': case '{': case '}': case '(': case ')': case '~':
+		case '!': case ',': case '?': case '[': case ']': case ':':
+			break;
+		default:
+			error("Incorrect character '%02x", c);
+		}
+	}
+
+return_token:
+	printf("Token = %c (%u)\n", (isprint(ch)) ? ch : ' ', (unsigned) ch);
+	return yytoken = ch;
+}
+
+
+void open_file(const char *file)
+{
+	if (yyin != NULL)
+		fclose(yyin);
+	if (file == NULL) {
+		yyin = stdin;
+		filename = "(stdin)";
+		return;
+	}
+	if ((yyin = fopen(file, "r")) == NULL)
+		die("file '%s' not found", file);
+	filename = file;
+}
--- /dev/null
+++ b/symbol.c
@@ -1,0 +1,105 @@
+
+
+#include <stddef.h>
+
+#include "symbol.h"
+
+#define NR_SYM_HASH 32
+
+struct symhash {
+	struct symbol *buf[NR_SYM_HASH];
+	struct symbol *top;
+};
+
+
+struct symctx {
+	struct symbol *siden;
+	struct symbol *sstruct;
+	struct symbol *sgoto;
+	struct symctx *next;
+};
+
+
+
+static struct symctx global_ctx;
+static struct symctx *ctxp = &global_ctx;
+struct symhash siden, sgoto, sstruct;
+
+
+
+unsigned char hashfun(register const char *s)
+{
+	register unsigned char h, ch;
+
+	for (h = 0; ch = *s++; h += ch)
+		/* nothing */;
+	return h & NR_SYM_HASH - 1;
+}
+
+
+
+
+void new_ctx(struct symctx *ctx)
+{
+	ctx->siden = siden.top;
+	ctx->sstruct = sstruct.top;
+	ctx->sgoto = sgoto.top;
+	ctx->next = ctxp;
+	ctxp = ctx;
+}
+
+
+/*
+ * WARNING: This function is not portable and waits that incremental calls
+ * to alloca return decremented address
+ */
+static void del_hash_ctx(struct symhash *h, struct symbol *const top)
+{
+	register struct symbol **bp;
+	static struct symbol **lim;
+
+	lim = h->buf + NR_SYM_HASH;
+	for (bp = h->buf; bp < lim; bp++) {
+		register struct symbol *aux;
+		for (aux = *bp; aux < top; *bp = aux = aux->next)
+			if (aux == h->top)
+				h->top = aux;
+	}
+}
+
+
+void del_ctx(void)
+{
+	del_hash_ctx(&siden, ctxp->siden);
+	del_hash_ctx(&sstruct, ctxp->sstruct);
+	del_hash_ctx(&sgoto, ctxp->sgoto); /* TODO: correct handling in goto */
+}
+
+
+
+
+struct symbol *pushsym(struct symhash *h, struct symbol *sym)
+{
+	static unsigned char key;
+	key = hashfun(sym->str);
+
+	h->top = sym;
+	sym->next = h->buf[key];
+	return h->buf[key] = sym;
+}
+
+
+
+
+struct symbol *findsym(struct symhash *h, char *s)
+{
+	register struct symbol *bp;
+	static unsigned char key;
+
+	key = hashfun(s);
+	for (bp = h->buf[key]; bp; bp = bp->next) {
+		if (!strcmp(bp->str, s))
+			return bp;
+	}
+	return NULL;
+}
--- /dev/null
+++ b/symbol.h
@@ -1,0 +1,22 @@
+
+#pragma once
+#ifndef SYMBOL_H
+#define SYMBOL_H
+
+
+struct type;
+
+struct symbol {
+	char *str;
+	struct type *type;
+	struct symbol *next;
+};
+
+
+struct symhash;
+extern struct symhash siden, sgoto, sstruct;
+
+extern unsigned char hashfun(register const char *s);
+
+
+#endif
--- /dev/null
+++ b/tokens.h
@@ -1,0 +1,38 @@
+#ifndef TOKENS_H
+#define TOKENS_H
+
+/* Don't change this codification because program used it!!! */
+enum {
+  /* types */
+  INT = 1, CHAR, FLOAT, LONG, LLONG, SHORT, VOID, DOUBLE,
+  LDOUBLE, STRUCT, UNION, ENUM, UTYPE, BOOL,
+  /* storage specifier */
+  TYPEDEF, EXTERN, STATIC, AUTO, REGISTER,
+  /* type qualifier */
+  VOLATILE, CONST, RESTRICTED,
+  /* sign specifier */
+  UNSIGNED, SIGNED
+};
+
+
+
+
+enum {
+  IDENTIFIER = 128, CONSTANT, STRING_LITERAL, SIZEOF,
+	PTR_OP, INC_OP, DEC_OP, LEFT_OP, RIGHT_OP, LE_OP, GE_OP, EQ_OP, NE_OP,
+	AND_OP, OR_OP, MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, ADD_ASSIGN,
+	SUB_ASSIGN, LEFT_ASSIGN, RIGHT_ASSIGN, AND_ASSIGN,
+	XOR_ASSIGN, OR_ASSIGN, TYPE_NAME,
+	ELLIPSIS,
+	CASE, DEFAULT, IF, ELSE, SWITCH, WHILE, DO, FOR, GOTO,
+	CONTINUE, BREAK, RETURN, EOFTOK
+};
+
+extern char yytext[];
+extern unsigned char yyhash;
+extern size_t yylen;
+extern unsigned char yytoken;
+
+extern unsigned char gettok(void);
+extern void init_lex(void);
+#endif
--- /dev/null
+++ b/types.c
@@ -1,0 +1,41 @@
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "types.h"
+
+#define xcalloc calloc
+
+struct type tschar, tuchar;             /* signed char, unsigned char */
+struct type tshort, tushort;            /* short,  unsigned short */
+struct type tint, tuint;                /* int,  unsigned int */
+struct type tfloat, tdouble, tldouble;	/* float, double, long double */
+struct type tlong, tulong;		/* long, unsgined long */
+struct type tllong, tullong;		/* long long, unsigned long long */
+struct type tvoid;			/* void */
+
+
+struct type *mktype(register struct type *base, unsigned  char op)
+{
+	register struct type **ptr, *nt;
+	assert(op == PTR || op == ARY || op == FTN);
+
+	switch (op) {
+	case PTR:
+		ptr = &base->ptr;
+		break;
+	case ARY:
+		ptr = &base->ary;
+		break;
+	case FTN:
+		ptr = &base->ftn;
+		break;
+	}
+	if (*ptr)  return *ptr;
+
+	nt = xcalloc(sizeof(*base), 1);
+	*ptr = nt;
+	nt->op = op;
+	nt->base = base;
+	return nt;
+}
--- /dev/null
+++ b/types.h
@@ -1,0 +1,46 @@
+#ifndef TYPES_H_
+#define TYPES_H_
+
+
+struct type {
+	unsigned char op;
+	struct type *base;
+	struct type *ary;               /* array */
+	struct type *ptr;		/* pointer */
+	struct type *ftn;		/* function */
+	union  {
+		size_t nelem;
+	} u;
+};
+
+
+extern struct type tschar, tuchar, tshort, tushort, tint, tuint;
+extern struct type tfloat, tdouble, tldouble, tlong;
+extern struct type tulong, tllong, tullong, tvoid;
+
+#define T_SCHAR   (&tschar)
+#define T_UCHAR   (&tuchar)
+#define T_SHORT   (&tshort)
+#define T_USHORT  (&tushort)
+#define T_INT     (&tint)
+#define T_UINT    (&tuint)
+#define T_FLOAT   (&tfloat)
+#define T_DOUBLE  (&tdouble)
+#define T_LDOUBLE (&tdouble)
+#define T_LONG    (&tlong)
+#define T_ULONG   (&tulong)
+#define T_LLONG   (&tllong)
+#define T_ULLONG  (&tullong)
+#define T_VOID    (&tvoid)
+
+
+#define ARY             1
+#define PTR             2
+#define FTN             3
+#define T_CONST           8
+#define T_RESTRICTED     16
+#define T_VOLATILE       32
+
+struct type *mktype(register struct type *base, unsigned  char op);
+
+#endif