ref: 1ffd5f6ee169b5d76958348b3376d82a66a75d63
dir: /lex.c/
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include "cc.h" #include "symbol.h" #include "tokens.h" #define NR_KWD_HASH 32 static struct keyword { char *str; unsigned char tok; struct keyword *next; } keywords [] = {"auto", AUTO, NULL, "break", BREAK, NULL, "_Bool", CHAR, NULL, "case", CASE, NULL, "char", CHAR, NULL, "const", CONST, NULL, "continue", CONTINUE, NULL, "default", DEFAULT, NULL, "do", DO, NULL, "double", DOUBLE, NULL, "else", ELSE, NULL, "enum", ENUM, NULL, "extern", EXTERN, NULL, "float", FLOAT, NULL, "for", FOR, NULL, "goto", GOTO, NULL, "if", IF, NULL, "int", INT, NULL, "long", LONG, NULL, "register", REGISTER, NULL, "restricted", RESTRICTED, NULL, "return", RETURN, NULL, "short", SHORT, NULL, "signed", SIGNED, NULL, "sizeof", SIZEOF, NULL, "static", STATIC, NULL, "struct", STRUCT, NULL, "switch", SWITCH, NULL, "typedef", TYPEDEF, NULL, "union", UNION, NULL, "unsigned", UNSIGNED, NULL, "void", VOID, NULL, "volatile", VOLATILE, NULL, "while", WHILE, NULL, NULL, 0, NULL }; static struct keyword *khash[NR_KWD_HASH]; static FILE *yyin; union yyval yyval; unsigned char yytoken; unsigned char yyhash; char yytext[TOKSIZ_MAX + 1]; unsigned linenum; unsigned columnum; const char *filename; static unsigned char hashfun(register const char *s) { register unsigned char h, ch; for (h = 0; ch = *s++; h += ch) /* nothing */; return h; } void init_lex(void) { register struct keyword *bp; static unsigned char h; for (bp = keywords; bp->str; bp++) { register struct keyword *aux, *ant; h = hashfun(bp->str) & (NR_KWD_HASH - 1); if (!(aux = khash[h]) || strcmp(bp->str, aux->str) < 0) { khash[h] = bp; bp->next = aux; continue; } for (ant = aux; aux; ant = aux, aux = aux->next) { if (strcmp(bp->str, aux->str) < 0) break; } ant->next = bp; bp->next = aux; } } static char number(void) { register char *bp; register char ch; for (bp = yytext; bp < yytext + TOKSIZ_MAX; *bp++ = ch) { if (!isdigit(ch = getc(yyin))) break; } if (bp == yytext + TOKSIZ_MAX) error("identifier too long %s", yytext); ungetc(ch, yyin); *bp = '\0'; return CONSTANT; } static unsigned char keyword(const char *s, unsigned char key) { register struct keyword *kwp; key &= NR_KWD_HASH - 1; for (kwp = khash[key]; kwp; kwp = kwp->next) { if (!strcmp(kwp->str, yytext)) return kwp->tok; } return 0; } static unsigned char iden(void) { register char ch; register char *bp = yytext; for (yyhash = 0; bp < yytext + TOKSIZ_MAX; *bp++ = ch) { if (!isalnum(ch = getc(yyin)) && ch != '_') break; yyhash += ch; } if (bp == yytext + TOKSIZ_MAX) error("identifier too long %s", yytext); *bp = '\0'; ungetc(ch, yyin); if (ch = keyword(yytext, yyhash)) return ch; return IDEN;; } static unsigned char skip(void) { register int c; extern char parser_out_home; while (isspace(c = getc(yyin))) { if (c == '\n') ++linenum, columnum = 1; else ++columnum; } if (c == EOF) { if (parser_out_home) error("Find EOF while parsing"); return 1; } ungetc(c, yyin); return 0; } static unsigned char follow(unsigned char op, unsigned char eq, unsigned char rep) { register char c; if ((c = getc(yyin)) == '=') return eq; else if (c == op && rep) return rep; ungetc(c, yyin); return op; } static unsigned char rel_shift(unsigned char op) { static char tokens[2][3] = { {GE, LSHIFT, LSHIFT_EQ}, {LE, RSHIFT, RSHIFT_EQ}}; register char c; register char *tp = tokens[op == '>']; if ((c = getc(yyin)) == '=') { return tp[0]; } else if (c == op) { if ((c = getc(yyin)) == '=') return tp[2]; op = tp[1]; } ungetc(c, yyin); return c; } static unsigned char minus(void) { register int c; switch (c = getc(yyin)) { case '-': return DEC; case '>': return PTR; case '=': return SUB_EQ; default: ungetc(c, yyin); return '-'; } } unsigned char next(void) { register unsigned char c; if (!skip()) c = EOFTOK; if (isalpha(c = getc(yyin)) || c == '_') { ungetc(c, yyin); c = iden(); } else if (isdigit(c)) { ungetc(c, yyin); c = number(); } else { switch (c) { case '=': c = follow('=', EQ, 0); break; case '^': c = follow('^', XOR_EQ, 0); break; case '*': c = follow('*', MUL_EQ, 0); break; case '!': c = follow('!', NE, 0); break; case '+': c = follow('+', ADD_EQ, INC); break; case '&': c = follow('&', AND_EQ, AND); break; case '|': c = follow('|', OR_EQ, OR); break; case '<': c = rel_shift('<'); break; case '>': c = rel_shift('>'); break; case '-': c = minus(); break; } } return yytoken = c; } char accept(unsigned char tok) { if (yytoken == tok) { next(); return 1; } return 0; } void expect(unsigned char tok) { if (yytoken != tok) error("unexpected %s", yytext); next(); } void open_file(const char *file) { if (yyin != NULL) fclose(yyin); if (file == NULL) { yyin = stdin; filename = "(stdin)"; return; } if ((yyin = fopen(file, "r")) == NULL) die("file '%s' not found", file); filename = file; columnum = linenum = 1; }