shithub: scc

Download patch

ref: dfff0083986b3e3687728deb0b56ec7bc718f4fc
parent: fd262ec2520b41e58a86866076e2509d26ba61bf
author: Roberto E. Vargas Caballero <[email protected]>
date: Tue Sep 19 10:48:35 EDT 2017

[as] Add basic lexer

This is only a basic skeleton of the work that must be done in the lexer.
This work also suggest that it may be interesting to allow to have different
syntaxs based in the target processor.

--- a/as/Makefile
+++ b/as/Makefile
@@ -6,7 +6,7 @@
 
 OBJ = main.o emit.o ins.o parser.o expr.o
 HDR = ../inc/scc.h as.h
-MOREFLAGS = $(AS_CFLAGS)
+MOREFLAGS = -I../inc/$(STD) $(AS_CFLAGS)
 
 all:
 
--- a/as/as.h
+++ b/as/as.h
@@ -116,7 +116,7 @@
 #endif
 
 /* expr.c */
-extern Node *expr(void);
+extern Node *expr(char *s);
 
 /*
  * Definition of global variables
--- a/as/expr.c
+++ b/as/expr.c
@@ -1,7 +1,9 @@
 static char sccsid[] = "@(#) ./as/node.c";
 
+#include <ctype.h>
 #include <string.h>
 
+#include <cstd.h>
 #include "../inc/scc.h"
 #include "as.h"
 
@@ -9,17 +11,20 @@
 
 enum tokens {
 	IDEN = 1,
+	NUMBER,
+	REG,
 	CHAR,
 	STRING,
 	SHL,
 	SHR,
 	GE,
-	LT,
+	LE,
 };
 
 static Alloc *arena;
 static int yytoken;
-static char *yytext;
+static char yytext[INTIDENTSIZ+1], *textp, *endp;
+static size_t yylen;
 
 #define accept(t) (yytoken == (t) ? next() : 0)
 
@@ -50,9 +55,97 @@
 }
 
 static int
+follow(int expect1, int expect2, int ifyes1, int ifyes2, int ifno)
+{
+	int c;
+
+	if ((c = *++textp) == expect1)
+		return ifyes1;
+	if (c == expect2)
+		return ifyes2;
+	--textp;
+	return ifno;
+}
+
+static void
+tok2str(void)
+{
+	if ((yylen = endp - textp) > INTIDENTSIZ) {
+		error("token too big");
+		yylen = INTIDENTSIZ;
+	}
+	memcpy(yytext, textp, yylen);
+	yytext[yylen] = '\0';
+	textp = endp;
+}
+
+static int
+iden(void)
+{
+	int c;
+	char *p;
+
+	for (endp = textp; isalnum(c = *endp) || c == '_' || c == '.'; ++endp)
+		/* nothing */;
+	return IDEN;
+}
+
+static int
+number(void)
+{
+	int c;
+	char *p;
+
+	for (endp = textp; isxdigit(*endp); ++endp)
+		/* nothing */;
+	return NUMBER;
+}
+
+static int
+character(void)
+{
+	int c;
+	char *p;
+
+	for (endp = textp+1; *endp != '\''; ++endp)
+		/* nothing */;
+	return CHAR;
+}
+
+static int
+string(void)
+{
+	int c;
+	char *p;
+
+	for (endp = textp+1; *endp != '"'; ++endp)
+		/* nothing */;
+	return STRING;
+}
+
+static int
 next(void)
 {
-	return 0;
+	int c;
+
+	while (isspace(*textp))
+		++textp;
+	c = *textp;
+	if (isalpha(c) || c == '_' || c == '.')
+		c = iden();
+	else if (isdigit(c))
+		c = number();
+	else if (c == '>')
+		c = follow('=', '>', LE, SHL, '>');
+	else if (c == '<')
+		c = follow('=', '<', GE, SHR, '>');
+	else if (c == '\'')
+		c = character();
+	else if (c == '\"')
+		c = string();
+	tok2str();
+
+	return c;
 }
 
 static void
@@ -73,6 +166,8 @@
 /* grammar functions                                                     */
 /*************************************************************************/
 
+static Node *or(void);
+
 static Node *
 primary(void)
 {
@@ -79,6 +174,7 @@
 	Node *np;
 
 	switch (yytoken) {
+	case NUMBER:
 	case IDEN:
 	case CHAR:
 	case STRING:
@@ -87,7 +183,7 @@
 		break;
 	case '(':
 		next();
-		np = expr();
+		np = or();
 		expect(')');
 		break;
 	default:
@@ -152,7 +248,7 @@
 		case '>':
 		case '=':
 		case GE:
-		case LT:
+		case LE:
 			next();
 			np = binary(op, np, add());
 			break;
@@ -184,8 +280,8 @@
 	return np;
 }
 
-Node *
-expr(void)
+static Node *
+or(void)
 {
 	int op;
 	Node *np;
@@ -201,4 +297,18 @@
 		default: return np;
 		}
 	}
+}
+
+Node *
+expr(char *s)
+{
+	Node *np;
+
+	textp = s;
+	next();
+	np = or();
+
+	if (*textp != '\0')
+		error("trailing characters in expression '%s'", textp);
+	return np;
 }