ref: 0ece8c92df0019d3eb4d1d427f6001d18104e357
parent: 8460bb981bb821656f7bd54a7d5f647a7a9ff400
author: Roberto E. Vargas Caballero <[email protected]>
date: Wed Feb 8 04:54:48 EST 2012
First version This version only has partiall support for declarations, and it is "a in progress" version.
--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,3 @@
+*.o
+makefile
+kcc
--- /dev/null
+++ b/Makefile
@@ -1,0 +1,32 @@
+
+OBJS = types.o decl.o lex.o error.o symbol.o
+LIBS =
+
+all: kcc
+
+kcc: $(OBJS)
+ $(CC) $(LDFLAGS) $(CFLAGS) $(LIBS) $(OBJS) -o $@
+
+%.d: %.c
+ $(CC) -M $(CPPFLAGS) $< | \
+ sed -e 's,/usr/[^ ]*,,g' | \
+ egrep -v '^ *\\$$' > $@
+
+.PHONY: clean distclean dep
+
+dep: $(OBJS:.o=.d)
+ cat Makefile $? > makefile
+ rm -f *.d
+
+clean:
+ rm -f $(OBJS)
+ rm -f cc
+
+distclean: clean
+ rm -f *~
+ rm -f *.d
+ rm -f makefile
+
+
+
+###Dependencies
--- /dev/null
+++ b/cc.h
@@ -1,0 +1,18 @@
+#ifndef CC_H
+#define CC_H
+
+extern unsigned linenum;
+extern unsigned columnum;
+extern const char *filename;
+
+struct {
+ unsigned implicit_int : 1;
+ unsigned c99 : 1;
+} user_opt;
+
+
+extern void warning(const char *fmt, ...);
+extern void error(const char *fmt, ...);
+extern void die(const char *fmt, ...);
+extern void warning_error(char flag, const char *fmt, ...);
+#endif
--- /dev/null
+++ b/decl.c
@@ -1,0 +1,332 @@
+#include <assert.h>
+#include <stddef.h>
+
+#include "cc.h"
+#include "tokens.h"
+#include "types.h"
+
+/* ANSI C says minimum maximum for indirection level is 12 */
+#define PTRLEVEL_MAX 12
+
+char parser_out_home;
+
+#ifndef NDEBUG
+#include <stdio.h>
+
+static void ptype(register struct type *t)
+{
+ assert(t);
+
+ for (; t; t = t->base) {
+ switch (t->op) {
+ case ARY:
+ fputs("array of ", stdout);
+ break;
+ case PTR:
+ fputs("pointer to ", stdout);
+ break;
+ case FTN:
+ fputs("function that returns ", stdout);
+ break;
+ default:
+ fputs("primitive data ", stdout);
+ break;
+ }
+ }
+ putchar('\n');
+}
+#else
+# define ptype(t)
+#endif
+
+
+static unsigned char stack[30];
+static unsigned char *stackp = stack;
+
+#define push(x) (*stackp++ = (x))
+#define pop() (*--stackp)
+#define empty() (stackp == stack)
+
+
+void decl(void);
+
+
+void dirdcl(void)
+{
+ puts("dirdecl");
+ if (yytoken == '(') {
+ gettok();
+ decl();
+ if (yytoken != ')')
+ error("expected ')'");
+ gettok();
+ } else if (yytoken == IDENTIFIER) {
+ gettok();
+ /* here we are!!! */;
+ }
+
+ for (;;) {
+ switch (yytoken) {
+ case '(':
+ push(FTN);
+ if (gettok() == ')')
+ gettok();
+ else
+ /* TODO: prototyped function */;
+ continue;
+ case '[':
+ push(ARY);
+ if (gettok() == ']')
+ gettok();
+ else
+ /* TODO: specify size of array */;
+ continue;
+ default:
+ printf("leaving dirdcl %c\n", yytoken);
+ return;
+ }
+ }
+}
+
+
+
+
+
+/*
+ *
+ */
+
+struct type *types[][2] = {{T_VOID, NULL},
+ {T_SCHAR, T_UCHAR},
+ {T_SHORT, T_USHORT},
+ {T_INT, T_UINT},
+ {T_LONG, T_ULONG},
+ {T_LLONG, T_ULLONG},
+ {T_FLOAT, NULL},
+ {T_DOUBLE, NULL},
+ {T_LDOUBLE, NULL}};
+
+#define F_VOID 0
+#define F_CHAR 1
+#define F_SHORT 2
+#define F_INT 3
+#define F_LONG 4
+#define F_LLONG 5
+#define F_FLOAT 6
+#define F_DOUBLE 7
+#define F_LDOUBLE 8
+
+struct type *specifier(void)
+{
+ static char sign, sclass, tqlf, nt;
+ struct type *t = NULL;
+
+ tqlf = sign = sclass = 0;
+ for (;;) {
+ switch (gettok()) {
+ case TYPEDEF:case EXTERN:case STATIC:case AUTO:case REGISTER:
+ if (sclass != 0)
+ error("Two or more storage specifier");
+ sclass = yytoken;
+ continue;
+ case CONST: case VOLATILE: case RESTRICTED:
+ /* TODO */
+ continue;
+ case VOID: nt = F_VOID; goto check_type;
+ case CHAR: nt = F_CHAR; goto check_type;
+ case SHORT: nt = F_SHORT; goto check_type;
+ case INT: nt = F_INT; goto check_type;
+ case FLOAT: nt = F_FLOAT; goto check_type;
+ case DOUBLE: nt = F_DOUBLE; goto check_type;
+ case LONG: nt = F_LONG; goto check_type;
+ case SIGNED: case UNSIGNED:
+ if (sign != 0) {
+ error((sign != yytoken) ?
+ "signed and unsigned in declaration" :
+ "duplicated %s", yytext);
+ }
+ sign = yytoken;
+ if (t == NULL)
+ continue; /* we don't have type now */
+ goto check_type;
+ case STRUCT: /* TODO */
+ case UNION: /* TODO */
+ case ENUM: /* TODO */
+ case IDENTIFIER:
+ /* TODO */
+ default:
+ return t;
+ }
+ check_type:
+ if (nt == F_LONG) {
+ if (t == NULL ||
+ t == T_INT || t == T_UINT || t == T_LONG) {
+ /* nothing */;
+ } else if (t == T_LONG || t == T_ULONG) {
+ nt = F_LLONG;
+ } else if (t == T_DOUBLE) {
+ nt = F_LDOUBLE;
+ } else if (t == T_LLONG || t == T_ULLONG) {
+ error("'long long long' is too long");
+ } else if (t == T_LDOUBLE) {
+ error("'long long double' is too long");
+ } else {
+ goto two_or_more_btype;
+ }
+ } else if (t != NULL) {
+ goto two_or_more_btype;
+ } if (nt == F_VOID && sign != 0) {
+ goto incorrect_sign;
+ } if (nt == F_CHAR && sign == 0) {
+ t = T_UCHAR; /* char by default is unsigned */
+ } else if (!(t = types[nt][sign == UNSIGNED])) {
+ goto incorrect_sign;
+ }
+ }
+two_or_more_btype:
+ error("two or more basic types");
+incorrect_sign:
+ error("sign specifier applied to incorrect type");
+}
+
+
+#undef F_VOID
+#undef F_CHAR
+#undef F_SHORT
+#undef F_INT
+#undef F_LONG
+#undef F_LLong
+#undef F_FLOAT
+#undef F_DOUBLE
+#undef F_LDOUBLE
+
+
+void decl(void)
+{
+ unsigned char ns = 0;
+ unsigned char qlf[PTRLEVEL_MAX];
+
+ puts("decl");
+ for (ns = 0; yytoken == '*'; ns++) {
+ if (ns == PTRLEVEL_MAX)
+ error("Too much indirection levels");
+ switch (gettok()) {
+ case CONST:
+ if (!(qlf[ns] ^= 2))
+ goto duplicated;
+ continue;
+ case RESTRICTED:
+ if (!(qlf[ns] ^= 4))
+ goto duplicated;
+ continue;
+ case VOLATILE:
+ if (!(qlf[ns] ^= 8))
+ goto duplicated;
+ continue;
+ }
+ }
+ dirdcl();
+
+ if (ns)
+ push(PTR); /* TODO: pointer qualifiers */
+ printf("leaving dcl %c\n", yytoken);
+ return;
+
+duplicated:
+ error("duplicated '%s'", yytext);
+}
+
+
+
+void declaration(void)
+{
+ struct type *t;
+
+ t = specifier();
+
+ for (; ; gettok()) {
+ decl();
+ if (yytoken != ',' && yytoken != ';')
+ error("unexpected", yytext);
+ while (!empty())
+ t = mktype(t, pop());
+ ptype(t);
+
+ if (yytoken == ',')
+ /* add variable */;
+ else if (yytoken == ';') {
+ /* end of sentence */;
+ return;
+ }
+ }
+}
+
+
+#if 0
+void specdcl(void)
+{
+ struct spec_type t = {0, 0, 0};
+
+repeat:
+ parser_out_home = 1;
+ switch (gettok()) {
+ case TYPEDEF:
+ case EXTERN:
+ case STATIC:
+ case AUTO:
+ case REGISTER:
+ case CONST:
+ case VOLATILE:
+ case SIGNED:
+ case UNSIGNED:
+ if (!(t.mods ^= MODIFIER(tok)))
+ error("duplicate '%s'", yytext);
+ goto repeat;
+ case IDENTIFIER:
+ /* This is incorrect!!! */
+ t.type = TYPE(INT);
+ if (user_opt.implicit_int) {
+ warning_error(user_opt.c99,
+ "type defaults to ‘int’ in declaration"
+ " of", yytext);
+ }
+ if (gettok() != ';')
+ goto non_end_after_id;
+ return ';';
+ case VOID:
+ case CHAR:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ if (!(t.type ^= (1 << TYPE(tok))))
+ error("duplicate '%s'", yytext);
+
+ case STRUCT:
+ case UNION:
+ case ENUM:
+ case TYPE_NAME:
+ ;
+ }
+
+
+non_end_after_id:
+ error("';' expected");
+}
+#endif
+
+
+
+
+#include <stddef.h>
+
+
+int main(int argc, char *argv[])
+{
+ init_lex();
+
+ open_file(NULL);
+ declaration();
+
+ return 0;
+}
--- /dev/null
+++ b/error.c
@@ -1,0 +1,57 @@
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "cc.h"
+
+
+
+
+static void warning_error_helper(char flag, const char *fmt, va_list va)
+{
+ fprintf(stderr, "%s:%s:%u:%u: ",
+ (!flag) ? "warning" : "error", filename, linenum, columnum);
+ vfprintf(stderr, fmt, va);
+ putc('\n', stderr);
+ if (flag)
+ exit(EXIT_FAILURE); /* TODO: uhmmmm */
+}
+
+
+void warning_error(char flag, const char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ warning_error_helper(flag, fmt, va);
+ va_end(va);
+}
+
+
+void error(const char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ warning_error_helper(1, fmt, va);
+ va_end(va);
+}
+
+
+void warning(const char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ warning_error_helper(0, fmt, va);
+ va_end(va);
+}
+
+
+
+void die(const char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ fprintf(stderr, fmt, va);
+ va_end(va);
+ exit(EXIT_FAILURE);
+}
--- /dev/null
+++ b/lex.c
@@ -1,0 +1,185 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "symbol.h"
+#include "tokens.h"
+
+
+#define TOKSIZ_MAX 21
+#define NR_KWD_HASH 32
+/* TODO: move hashfun here */
+
+static struct keyword {
+ char *str;
+ unsigned char tok;
+ struct keyword *next;
+} keywords [] = {"auto", AUTO, NULL,
+ "break", BREAK, NULL,
+ "_Bool", CHAR, NULL,
+ "case", CASE, NULL,
+ "char", CHAR, NULL,
+ "const", CONST, NULL,
+ "continue", CONTINUE, NULL,
+ "default", DEFAULT, NULL,
+ "do", DO, NULL,
+ "double", DOUBLE, NULL,
+ "else", ELSE, NULL,
+ "enum", ENUM, NULL,
+ "extern", EXTERN, NULL,
+ "float", FLOAT, NULL,
+ "for", FOR, NULL,
+ "goto", GOTO, NULL,
+ "if", IF, NULL,
+ "int", INT, NULL,
+ "long", LONG, NULL,
+ "register", REGISTER, NULL,
+ "restricted", RESTRICTED, NULL,
+ "return", RETURN, NULL,
+ "short", SHORT, NULL,
+ "signed", SIGNED, NULL,
+ "sizeof", SIZEOF, NULL,
+ "static", STATIC, NULL,
+ "struct", STRUCT, NULL,
+ "switch", SWITCH, NULL,
+ "typedef", TYPEDEF, NULL,
+ "union", UNION, NULL,
+ "unsigned", UNSIGNED, NULL,
+ "void", VOID, NULL,
+ "volatile", VOLATILE, NULL,
+ "while", WHILE, NULL,
+ NULL, 0, NULL
+};
+
+static struct keyword *khash[NR_KWD_HASH];
+static FILE *yyin;
+
+unsigned char yytoken;
+unsigned char yyhash;
+size_t yylen;
+char yytext[TOKSIZ_MAX + 1];
+unsigned linenum;
+unsigned columnum;
+const char *filename;
+
+
+union yyval {
+ struct symbol *sym;
+} yyval;
+
+
+
+void init_lex(void)
+{
+ register struct keyword *bp;
+ static unsigned char h;
+
+ for (bp = keywords; bp->str; bp++) {
+ register struct keyword *aux, *ant;
+ h = hashfun(bp->str);
+ if (!(aux = khash[h])) {
+ khash[h] = bp;
+ continue;
+ }
+ ant = aux;
+ while (aux && strcmp(bp->str, aux->str) < 0) {
+ ant = aux;
+ aux = aux->next;
+ }
+ ant->next = bp;
+ bp->next = aux;
+ }
+}
+
+static unsigned char iden(void)
+{
+ register struct keyword *kwp;
+ register char ch;
+ register char *bp = yytext;
+
+ for (yyhash = 0; bp < yytext + TOKSIZ_MAX; *bp++ = ch) {
+ if (!isalnum(ch = getc(yyin)) && ch != '_')
+ break;
+ yyhash += ch;
+ }
+ if (bp == yytext + TOKSIZ_MAX)
+ error("identifier too long %s", yytext);
+ ungetc(ch, yyin);
+ *bp = '\0';
+ yylen = bp - yytext;
+ yyhash &= NR_KWD_HASH - 1;
+ for (kwp = khash[yyhash]; kwp; kwp = kwp->next) {
+ if (!strcmp(kwp->str, yytext))
+ return kwp->tok;
+ }
+ return IDENTIFIER;
+}
+
+
+
+unsigned char gettok(void)
+{
+ static unsigned int c;
+ register unsigned char ch;
+ extern char parser_out_home;
+
+ while (isspace(c = getc(yyin)))
+ /* nothing */;
+ if (c == EOF) {
+ if (parser_out_home)
+ error("Find EOF while parsing");
+ else
+ return EOFTOK;
+ }
+ ch = c;
+ if (isalpha(ch) || ch == '_') {
+ ungetc(ch, yyin);
+ ch = iden();
+ } else if (isdigit(ch)) {
+ ;
+ } else {
+ switch (ch) {
+ case '&': case '|':
+ if ((c = getc(yyin)) == ch) {
+ ch |= 0x80; /* TODO */
+ break;
+ } else {
+ ungetc(c, yyin);
+ }
+ case '^': case '=': case '<': case '>':
+ case '*': case '+': case '-': case '/':
+ if ((c = getc(yyin)) == '=') {
+ ch |= 0x80; /* TODO */
+ break;
+ } else {
+ ungetc(c, yyin);
+ }
+ case ';': case '{': case '}': case '(': case ')': case '~':
+ case '!': case ',': case '?': case '[': case ']': case ':':
+ break;
+ default:
+ error("Incorrect character '%02x", c);
+ }
+ }
+
+return_token:
+ printf("Token = %c (%u)\n", (isprint(ch)) ? ch : ' ', (unsigned) ch);
+ return yytoken = ch;
+}
+
+
+void open_file(const char *file)
+{
+ if (yyin != NULL)
+ fclose(yyin);
+ if (file == NULL) {
+ yyin = stdin;
+ filename = "(stdin)";
+ return;
+ }
+ if ((yyin = fopen(file, "r")) == NULL)
+ die("file '%s' not found", file);
+ filename = file;
+}
--- /dev/null
+++ b/symbol.c
@@ -1,0 +1,105 @@
+
+
+#include <stddef.h>
+
+#include "symbol.h"
+
+#define NR_SYM_HASH 32
+
+struct symhash {
+ struct symbol *buf[NR_SYM_HASH];
+ struct symbol *top;
+};
+
+
+struct symctx {
+ struct symbol *siden;
+ struct symbol *sstruct;
+ struct symbol *sgoto;
+ struct symctx *next;
+};
+
+
+
+static struct symctx global_ctx;
+static struct symctx *ctxp = &global_ctx;
+struct symhash siden, sgoto, sstruct;
+
+
+
+unsigned char hashfun(register const char *s)
+{
+ register unsigned char h, ch;
+
+ for (h = 0; ch = *s++; h += ch)
+ /* nothing */;
+ return h & NR_SYM_HASH - 1;
+}
+
+
+
+
+void new_ctx(struct symctx *ctx)
+{
+ ctx->siden = siden.top;
+ ctx->sstruct = sstruct.top;
+ ctx->sgoto = sgoto.top;
+ ctx->next = ctxp;
+ ctxp = ctx;
+}
+
+
+/*
+ * WARNING: This function is not portable and waits that incremental calls
+ * to alloca return decremented address
+ */
+static void del_hash_ctx(struct symhash *h, struct symbol *const top)
+{
+ register struct symbol **bp;
+ static struct symbol **lim;
+
+ lim = h->buf + NR_SYM_HASH;
+ for (bp = h->buf; bp < lim; bp++) {
+ register struct symbol *aux;
+ for (aux = *bp; aux < top; *bp = aux = aux->next)
+ if (aux == h->top)
+ h->top = aux;
+ }
+}
+
+
+void del_ctx(void)
+{
+ del_hash_ctx(&siden, ctxp->siden);
+ del_hash_ctx(&sstruct, ctxp->sstruct);
+ del_hash_ctx(&sgoto, ctxp->sgoto); /* TODO: correct handling in goto */
+}
+
+
+
+
+struct symbol *pushsym(struct symhash *h, struct symbol *sym)
+{
+ static unsigned char key;
+ key = hashfun(sym->str);
+
+ h->top = sym;
+ sym->next = h->buf[key];
+ return h->buf[key] = sym;
+}
+
+
+
+
+struct symbol *findsym(struct symhash *h, char *s)
+{
+ register struct symbol *bp;
+ static unsigned char key;
+
+ key = hashfun(s);
+ for (bp = h->buf[key]; bp; bp = bp->next) {
+ if (!strcmp(bp->str, s))
+ return bp;
+ }
+ return NULL;
+}
--- /dev/null
+++ b/symbol.h
@@ -1,0 +1,22 @@
+
+#pragma once
+#ifndef SYMBOL_H
+#define SYMBOL_H
+
+
+struct type;
+
+struct symbol {
+ char *str;
+ struct type *type;
+ struct symbol *next;
+};
+
+
+struct symhash;
+extern struct symhash siden, sgoto, sstruct;
+
+extern unsigned char hashfun(register const char *s);
+
+
+#endif
--- /dev/null
+++ b/tokens.h
@@ -1,0 +1,38 @@
+#ifndef TOKENS_H
+#define TOKENS_H
+
+/* Don't change this codification because program used it!!! */
+enum {
+ /* types */
+ INT = 1, CHAR, FLOAT, LONG, LLONG, SHORT, VOID, DOUBLE,
+ LDOUBLE, STRUCT, UNION, ENUM, UTYPE, BOOL,
+ /* storage specifier */
+ TYPEDEF, EXTERN, STATIC, AUTO, REGISTER,
+ /* type qualifier */
+ VOLATILE, CONST, RESTRICTED,
+ /* sign specifier */
+ UNSIGNED, SIGNED
+};
+
+
+
+
+enum {
+ IDENTIFIER = 128, CONSTANT, STRING_LITERAL, SIZEOF,
+ PTR_OP, INC_OP, DEC_OP, LEFT_OP, RIGHT_OP, LE_OP, GE_OP, EQ_OP, NE_OP,
+ AND_OP, OR_OP, MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, ADD_ASSIGN,
+ SUB_ASSIGN, LEFT_ASSIGN, RIGHT_ASSIGN, AND_ASSIGN,
+ XOR_ASSIGN, OR_ASSIGN, TYPE_NAME,
+ ELLIPSIS,
+ CASE, DEFAULT, IF, ELSE, SWITCH, WHILE, DO, FOR, GOTO,
+ CONTINUE, BREAK, RETURN, EOFTOK
+};
+
+extern char yytext[];
+extern unsigned char yyhash;
+extern size_t yylen;
+extern unsigned char yytoken;
+
+extern unsigned char gettok(void);
+extern void init_lex(void);
+#endif
--- /dev/null
+++ b/types.c
@@ -1,0 +1,41 @@
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "types.h"
+
+#define xcalloc calloc
+
+struct type tschar, tuchar; /* signed char, unsigned char */
+struct type tshort, tushort; /* short, unsigned short */
+struct type tint, tuint; /* int, unsigned int */
+struct type tfloat, tdouble, tldouble; /* float, double, long double */
+struct type tlong, tulong; /* long, unsgined long */
+struct type tllong, tullong; /* long long, unsigned long long */
+struct type tvoid; /* void */
+
+
+struct type *mktype(register struct type *base, unsigned char op)
+{
+ register struct type **ptr, *nt;
+ assert(op == PTR || op == ARY || op == FTN);
+
+ switch (op) {
+ case PTR:
+ ptr = &base->ptr;
+ break;
+ case ARY:
+ ptr = &base->ary;
+ break;
+ case FTN:
+ ptr = &base->ftn;
+ break;
+ }
+ if (*ptr) return *ptr;
+
+ nt = xcalloc(sizeof(*base), 1);
+ *ptr = nt;
+ nt->op = op;
+ nt->base = base;
+ return nt;
+}
--- /dev/null
+++ b/types.h
@@ -1,0 +1,46 @@
+#ifndef TYPES_H_
+#define TYPES_H_
+
+
+struct type {
+ unsigned char op;
+ struct type *base;
+ struct type *ary; /* array */
+ struct type *ptr; /* pointer */
+ struct type *ftn; /* function */
+ union {
+ size_t nelem;
+ } u;
+};
+
+
+extern struct type tschar, tuchar, tshort, tushort, tint, tuint;
+extern struct type tfloat, tdouble, tldouble, tlong;
+extern struct type tulong, tllong, tullong, tvoid;
+
+#define T_SCHAR (&tschar)
+#define T_UCHAR (&tuchar)
+#define T_SHORT (&tshort)
+#define T_USHORT (&tushort)
+#define T_INT (&tint)
+#define T_UINT (&tuint)
+#define T_FLOAT (&tfloat)
+#define T_DOUBLE (&tdouble)
+#define T_LDOUBLE (&tdouble)
+#define T_LONG (&tlong)
+#define T_ULONG (&tulong)
+#define T_LLONG (&tllong)
+#define T_ULLONG (&tullong)
+#define T_VOID (&tvoid)
+
+
+#define ARY 1
+#define PTR 2
+#define FTN 3
+#define T_CONST 8
+#define T_RESTRICTED 16
+#define T_VOLATILE 32
+
+struct type *mktype(register struct type *base, unsigned char op);
+
+#endif