shithub: libmujs

Download patch

ref: bb2279e43d905ca8383d5dec5eee57feec5462b1
parent: c816b1193efeabbeeddcf8c5324e92b895956dd5
author: Sebastian Rasmussen <[email protected]>
date: Mon Dec 23 14:47:12 EST 2013

Add lexer.

--- /dev/null
+++ b/Makefile
@@ -1,0 +1,25 @@
+SRCS := $(wildcard js*.c)
+HDRS := $(wildcard js*.h)
+OBJS := $(SRCS:%.c=build/%.o)
+
+CFLAGS = -Wall -g
+
+default: build js
+
+build:
+	mkdir -p build
+
+build/%.o : %.c $(HDRS)
+	$(CC) -c $< -o $@ $(CFLAGS)
+
+build/libjs.a: $(OBJS)
+	ar cru $@ $^
+
+js: build/main.o build/libjs.a
+	$(CC) -o $@ $^ -lm
+
+tags:
+	ctags *.c *.h
+
+clean:
+	rm -f build/* js
--- /dev/null
+++ b/js-lex.c
@@ -1,0 +1,468 @@
+#include "js.h"
+
+#define nelem(a) (sizeof (a) / sizeof (a)[0])
+
+struct {
+	const char *string;
+	js_Token token;
+} keywords[] = {
+	{"abstract", JS_ABSTRACT},
+	{"boolean", JS_BOOLEAN},
+	{"break", JS_BREAK},
+	{"byte", JS_BYTE},
+	{"case", JS_CASE},
+	{"catch", JS_CATCH},
+	{"char", JS_CHAR},
+	{"class", JS_CLASS},
+	{"const", JS_CONST},
+	{"continue", JS_CONTINUE},
+	{"debugger", JS_DEBUGGER},
+	{"default", JS_DEFAULT},
+	{"delete", JS_DELETE},
+	{"do", JS_DO},
+	{"double", JS_DOUBLE},
+	{"else", JS_ELSE},
+	{"enum", JS_ENUM},
+	{"export", JS_EXPORT},
+	{"extends", JS_EXTENDS},
+	{"false", JS_FALSE},
+	{"final", JS_FINAL},
+	{"finally", JS_FINALLY},
+	{"float", JS_FLOAT},
+	{"for", JS_FOR},
+	{"function", JS_FUNCTION},
+	{"goto", JS_GOTO},
+	{"if", JS_IF},
+	{"implements", JS_IMPLEMENTS},
+	{"import", JS_IMPORT},
+	{"in", JS_IN},
+	{"instanceof", JS_INSTANCEOF},
+	{"int", JS_INT},
+	{"interface", JS_INTERFACE},
+	{"long", JS_LONG},
+	{"native", JS_NATIVE},
+	{"new", JS_NEW},
+	{"null", JS_NULL},
+	{"package", JS_PACKAGE},
+	{"private", JS_PRIVATE},
+	{"protected", JS_PROTECTED},
+	{"public", JS_PUBLIC},
+	{"return", JS_RETURN},
+	{"short", JS_SHORT},
+	{"static", JS_STATIC},
+	{"super", JS_SUPER},
+	{"switch", JS_SWITCH},
+	{"synchronized", JS_SYNCHRONIZED},
+	{"this", JS_THIS},
+	{"throw", JS_THROW},
+	{"throws", JS_THROWS},
+	{"transient", JS_TRANSIENT},
+	{"true", JS_TRUE},
+	{"try", JS_TRY},
+	{"typeof", JS_TYPEOF},
+	{"var", JS_VAR},
+	{"void", JS_VOID},
+	{"volatile", JS_VOLATILE},
+	{"while", JS_WHILE},
+	{"with", JS_WITH},
+};
+
+const char *tokenstrings[] = {
+	"ERROR", "EOF", "(identifier)", "null", "true", "false", "(number)",
+	"(string)", "(regexp)", "\\n", "{", "}", "(", ")", "[", "]", ".", ";",
+	",", "<", ">", "<=", ">=", "==", "!=", "===", "!==", "+", "-", "*",
+	"%", "++", "--", "<<", ">>", ">>>", "&", "|", "^", "!", "~", "&&",
+	"||", "?", ":", "=", "+=", "-=", "*=", "%=", "<<=", ">>=", ">>>=",
+	"&=", "|=", "^=", "/", "/=", "break", "case", "catch", "continue",
+	"default", "delete", "do", "else", "finally", "for", "function", "if",
+	"in", "instanceof", "new", "return", "switch", "this", "throw", "try",
+	"typeof", "var", "void", "while", "with", "abstract", "boolean",
+	"byte", "char", "class", "const", "debugger", "double", "enum",
+	"export", "extends", "final", "float", "goto", "implements", "import",
+	"int", "interface", "long", "native", "package", "private",
+	"protected", "public", "short", "static", "super", "synchronized",
+	"throws", "transient", "volatile",
+};
+
+const char *js_tokentostring(js_Token t)
+{
+	return tokenstrings[t];
+}
+
+static inline js_Token findkeyword(const char *s)
+{
+	int m, l, r;
+	int c;
+
+	l = 0;
+	r = nelem(keywords) - 1;
+
+	while (l <= r) {
+		m = (l + r) >> 1;
+		c = strcmp(s, keywords[m].string);
+		if (c < 0)
+			r = m - 1;
+		else if (c > 0)
+			l = m + 1;
+		else
+			return keywords[m].token;
+	}
+
+	return JS_IDENTIFIER;
+}
+
+static inline int iswhite(int c)
+{
+	return c == 0x9 || c == 0xb || c == 0xc || c == 0x20 || c == 0xa0;
+}
+
+static inline int isnewline(c)
+{
+	return c == 0xa || c == 0xd || c == 0x2028 || c == 0x2029;
+}
+
+#define GETC() *(*sp)++
+#define UNGETC() (*sp)--
+#define LOOK(x) (**sp == x ? *(*sp)++ : 0)
+
+static inline void lexlinecomment(const char **sp)
+{
+	int c = GETC();
+	while (!isnewline(c))
+		c = GETC();
+	UNGETC();
+}
+
+static inline int lexcomment(const char **sp)
+{
+	while (1) {
+		int c = GETC();
+		if (c == '*') {
+			while (c == '*')
+				c = GETC();
+			if (c == '/')
+				return 0;
+		} else if (c == 0) {
+			return -1;
+		}
+	}
+}
+
+static inline int isidentifierstart(int c)
+{
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '$' || c == '_';
+}
+
+static inline int isidentifierpart(int c)
+{
+	return (c >= '0' && c <= '9') || isidentifierstart(c);
+}
+
+static inline int isdec(int c)
+{
+	return (c >= '0' && c <= '9');
+}
+
+static inline int ishex(int c)
+{
+	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+
+static inline int tohex(int c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 0xa;
+	if (c >= 'A' && c <= 'F')
+		return c - 'A' + 0xa;
+	return 0;
+}
+
+static inline js_Token lexhex(const char **sp, double *yynumber)
+{
+	int c = GETC();
+	double n = 0;
+
+	if (!ishex(c))
+		return JS_ERROR;
+
+	do {
+		n = n * 16 + tohex(c);
+		c = GETC();
+	} while (ishex(c));
+
+	UNGETC();
+	*yynumber = n;
+
+	return JS_NUMBER;
+}
+
+static inline double lexinteger(const char **sp)
+{
+	int c = GETC();
+	double n = 0;
+
+	while (isdec(c)) {
+		n = n * 10 + (c - '0');
+		c = GETC();
+	}
+
+	UNGETC();
+
+	return n;
+}
+
+static inline double lexfraction(const char **sp)
+{
+	int c = GETC();
+	double n = 0;
+	double d = 1;
+
+	while (isdec(c)) {
+		n = n * 10 + (c - '0');
+		d = d * 10;
+		c = GETC();
+	}
+
+	UNGETC();
+
+	return n / d;
+}
+
+static inline js_Token lexnumber(int c, const char **sp, double *yynumber)
+{
+	double i, f, e;
+
+	if (c == '0' && (LOOK('x') || LOOK('X')))
+		return lexhex(sp, yynumber);
+
+	UNGETC();
+
+	i = lexinteger(sp);
+
+	f = 0;
+	if (LOOK('.'))
+		f = lexfraction(sp);
+
+	e = 0;
+	if (LOOK('e') || LOOK('E')) {
+		if (LOOK('-'))
+			e = -lexinteger(sp);
+		else if (LOOK('+'))
+			e = lexinteger(sp);
+		else
+			e = lexinteger(sp);
+	}
+
+	*yynumber = (i + f) * pow(10, e);
+
+	return JS_NUMBER;
+}
+
+static inline int lexescape(const char **sp)
+{
+	int c = GETC();
+	int x, y, z, w;
+
+	switch (c) {
+	case '0': return 0;
+	case 'u':
+		x = tohex(GETC());
+		y = tohex(GETC());
+		z = tohex(GETC());
+		w = tohex(GETC());
+		return (x << 12) | (y << 8) | (z << 4) | w;
+	case 'x':
+		x = tohex(GETC());
+		y = tohex(GETC());
+		return (x << 4) | y;
+	case '\'': return '\'';
+	case '"': return '"';
+	case '\\': return '\\';
+	case 'b': return '\b';
+	case 'f': return '\f';
+	case 'n': return '\n';
+	case 'r': return '\r';
+	case 't': return '\t';
+	case 'v': return '\v';
+	default: return c;
+	}
+}
+
+static inline js_Token lexstring(int q, const char **sp, char *yytext, size_t yylen)
+{
+	char *p = yytext;
+	int c = GETC();
+
+	while (c != q) {
+		if (c == 0 || isnewline(c))
+			return JS_ERROR;
+
+		if (c == '\\')
+			c = lexescape(sp);
+
+		if (p - yytext >= yylen)
+			return JS_ERROR;
+		*p++ = c;
+		c = GETC();
+	}
+
+	*p = 0;
+
+	return JS_STRING;
+}
+
+js_Token js_lex(js_State *J, const char **sp, char *yytext, size_t yylen, double *yynumber)
+{
+	int c = GETC();
+
+	while (c) {
+		while (iswhite(c))
+			c = GETC();
+
+		if (isnewline(c))
+			return JS_NEWLINE;
+
+		if (c == '/') {
+			c = GETC();
+			if (c == '/') {
+				lexlinecomment(sp);
+			} else if (c == '*') {
+				if (lexcomment(sp))
+					return JS_ERROR;
+			} else if (c == '=') {
+				return JS_SLASH_EQ;
+			} else {
+				UNGETC();
+				return JS_SLASH;
+			}
+		}
+
+		if (isidentifierstart(c)) {
+			char *p = yytext;
+
+			do {
+				if (p - yytext >= yylen)
+					return JS_ERROR;
+				*p++ = c;
+				c = GETC();
+			} while (isidentifierpart(c));
+
+			UNGETC();
+			*p = 0;
+
+			return findkeyword(yytext);
+		}
+
+		if ((c >= '0' && c <= '9') || c == '.')
+			return lexnumber(c, sp, yynumber);
+
+		if (c == '\'' || c == '"')
+			return lexstring(c, sp, yytext, yylen);
+
+		switch (c) {
+		case '{': return JS_LCURLY;
+		case '}': return JS_RCURLY;
+		case '(': return JS_LPAREN;
+		case ')': return JS_RPAREN;
+		case '[': return JS_LSQUARE;
+		case ']': return JS_RSQUARE;
+		case '.': return JS_PERIOD;
+		case ';': return JS_SEMICOLON;
+		case ',': return JS_COMMA;
+
+		case '<':
+			if (LOOK('<')) {
+				if (LOOK('='))
+					return JS_LT_LT_EQ;
+				return JS_LT_LT;
+			}
+			if (LOOK('='))
+				return JS_LT_EQ;
+			return JS_LT;
+
+		case '>':
+			if (LOOK('>')) {
+				if (LOOK('>')) {
+					if (LOOK('='))
+						return JS_GT_GT_GT_EQ;
+					return JS_GT_GT_GT;
+				}
+				if (LOOK('='))
+					return JS_GT_GT_EQ;
+				return JS_GT_GT;
+			}
+			if (LOOK('='))
+				return JS_GT_EQ;
+			return JS_GT;
+
+		case '=':
+			if (LOOK('=')) {
+				if (LOOK('='))
+					return JS_EQ_EQ_EQ;
+				return JS_EQ_EQ;
+			}
+			return JS_EQ;
+
+		case '!':
+			if (LOOK('=')) {
+				if (LOOK('='))
+					return JS_EXCL_EQ_EQ;
+				return JS_EXCL_EQ;
+			}
+			return JS_EXCL;
+
+		case '+':
+			if (LOOK('+'))
+				return JS_PLUS_PLUS;
+			if (LOOK('='))
+				return JS_PLUS_EQ;
+			return JS_PLUS;
+
+		case '-':
+			if (LOOK('-'))
+				return JS_MINUS_MINUS;
+			if (LOOK('='))
+				return JS_MINUS_EQ;
+			return JS_MINUS;
+
+		case '*':
+			if (LOOK('='))
+				return JS_STAR_EQ;
+			return JS_STAR;
+
+		case '%':
+			if (LOOK('='))
+				return JS_PERCENT_EQ;
+			return JS_PERCENT;
+
+		case '&':
+			if (LOOK('&'))
+				return JS_AND_AND;
+			if (LOOK('='))
+				return JS_AND_EQ;
+			return JS_AND;
+
+		case '|':
+			if (LOOK('|'))
+				return JS_BAR_BAR;
+			if (LOOK('='))
+				return JS_BAR_EQ;
+			return JS_BAR;
+
+		case '^':
+			if (LOOK('='))
+				return JS_HAT_EQ;
+			return JS_HAT;
+
+		case '~': return JS_TILDE;
+		case '?': return JS_QUESTION;
+		case ':': return JS_COLON;
+		}
+
+		c = GETC();
+	}
+
+	return JS_EOF;
+}
--- /dev/null
+++ b/js-load.c
@@ -1,0 +1,59 @@
+#include "js.h"
+
+int js_loadstring(js_State *J, const char *source)
+{
+	char yytext[512];
+	double yynumber;
+	js_Token t;
+
+	do {
+		t = js_lex(J, &source, yytext, sizeof yytext, &yynumber);
+
+		if (t == JS_NUMBER)
+			printf("%g\n", yynumber);
+		else if (t == JS_IDENTIFIER)
+			printf("id:%s\n", yytext);
+		else if (t == JS_STRING)
+			printf("'%s'\n", yytext);
+		else
+			printf("%s\n", js_tokentostring(t));
+	} while (t != JS_EOF && t != JS_ERROR);
+
+	return 0;
+}
+
+int js_loadfile(js_State *J, const char *filename)
+{
+	FILE *f;
+	char *s;
+	int n, t;
+
+	f = fopen(filename, "r");
+	if (!f)
+		return js_error(J, "cannot open file: '%s'", filename);
+
+	fseek(f, 0, SEEK_END);
+	n = ftell(f);
+	fseek(f, 0, SEEK_SET);
+
+	s = malloc(n + 1); /* add space for string terminator */
+	if (!s) {
+		fclose(f);
+		return js_error(J, "cannot allocate storage for file contents: '%s'", filename);
+	}
+
+	t = fread(s, 1, n, f);
+	if (t != n) {
+		free(s);
+		fclose(f);
+		return js_error(J, "cannot read data from file: '%s'", filename);
+	}
+
+	s[n] = 0; /* zero-terminate string containing file data */
+
+	t = js_loadstring(J, s);
+
+	free(s);
+	fclose(f);
+	return t;
+}
--- /dev/null
+++ b/js-state.c
@@ -1,0 +1,28 @@
+#include "js.h"
+
+js_State *js_newstate(void)
+{
+	js_State *J = malloc(sizeof *J);
+	memset(J, 0, sizeof(*J));
+	return J;
+}
+
+void js_close(js_State *J)
+{
+	free(J);
+}
+
+int js_error(js_State *J, const char *fmt, ...)
+{
+	va_list ap;
+
+	fprintf(stderr, "error: ");
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+
+	fprintf(stderr, "\n");
+
+	return 0;
+}
--- /dev/null
+++ b/js.h
@@ -1,0 +1,162 @@
+#ifndef js_h
+#define js_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <math.h>
+
+typedef struct js_State js_State;
+
+typedef int (*js_CFunction)(js_State *J);
+
+js_State *js_newstate(void);
+void js_close(js_State *J);
+
+int js_error(js_State *J, const char *fmt, ...);
+
+int js_loadstring(js_State *J, const char *s);
+int js_loadfile(js_State *J, const char *filename);
+
+/* private */
+
+typedef enum js_Token js_Token;
+
+enum js_Token
+{
+	JS_ERROR,
+	JS_EOF,
+
+	JS_IDENTIFIER,
+	JS_NULL,
+	JS_TRUE,
+	JS_FALSE,
+	JS_NUMBER,
+	JS_STRING,
+	JS_REGEXP,
+	JS_NEWLINE,
+
+	/* punctuators */
+	JS_LCURLY,
+	JS_RCURLY,
+	JS_LPAREN,
+	JS_RPAREN,
+	JS_LSQUARE,
+	JS_RSQUARE,
+	JS_PERIOD,
+	JS_SEMICOLON,
+	JS_COMMA,
+	JS_LT,
+	JS_GT,
+	JS_LT_EQ,
+	JS_GT_EQ,
+	JS_EQ_EQ,
+	JS_EXCL_EQ,
+	JS_EQ_EQ_EQ,
+	JS_EXCL_EQ_EQ,
+	JS_PLUS,
+	JS_MINUS,
+	JS_STAR,
+	JS_PERCENT,
+	JS_PLUS_PLUS,
+	JS_MINUS_MINUS,
+	JS_LT_LT,
+	JS_GT_GT,
+	JS_GT_GT_GT,
+	JS_AND,
+	JS_BAR,
+	JS_HAT,
+	JS_EXCL,
+	JS_TILDE,
+	JS_AND_AND,
+	JS_BAR_BAR,
+	JS_QUESTION,
+	JS_COLON,
+	JS_EQ,
+	JS_PLUS_EQ,
+	JS_MINUS_EQ,
+	JS_STAR_EQ,
+	JS_PERCENT_EQ,
+	JS_LT_LT_EQ,
+	JS_GT_GT_EQ,
+	JS_GT_GT_GT_EQ,
+	JS_AND_EQ,
+	JS_BAR_EQ,
+	JS_HAT_EQ,
+	JS_SLASH,
+	JS_SLASH_EQ,
+
+	/* keywords */
+	JS_BREAK,
+	JS_CASE,
+	JS_CATCH,
+	JS_CONTINUE,
+	JS_DEFAULT,
+	JS_DELETE,
+	JS_DO,
+	JS_ELSE,
+	JS_FINALLY,
+	JS_FOR,
+	JS_FUNCTION,
+	JS_IF,
+	JS_IN,
+	JS_INSTANCEOF,
+	JS_NEW,
+	JS_RETURN,
+	JS_SWITCH,
+	JS_THIS,
+	JS_THROW,
+	JS_TRY,
+	JS_TYPEOF,
+	JS_VAR,
+	JS_VOID,
+	JS_WHILE,
+	JS_WITH,
+
+	/* future reserved words */
+	JS_ABSTRACT,
+	JS_BOOLEAN,
+	JS_BYTE,
+	JS_CHAR,
+	JS_CLASS,
+	JS_CONST,
+	JS_DEBUGGER,
+	JS_DOUBLE,
+	JS_ENUM,
+	JS_EXPORT,
+	JS_EXTENDS,
+	JS_FINAL,
+	JS_FLOAT,
+	JS_GOTO,
+	JS_IMPLEMENTS,
+	JS_IMPORT,
+	JS_INT,
+	JS_INTERFACE,
+	JS_LONG,
+	JS_NATIVE,
+	JS_PACKAGE,
+	JS_PRIVATE,
+	JS_PROTECTED,
+	JS_PUBLIC,
+	JS_SHORT,
+	JS_STATIC,
+	JS_SUPER,
+	JS_SYNCHRONIZED,
+	JS_THROWS,
+	JS_TRANSIENT,
+	JS_VOLATILE,
+
+};
+
+struct js_State
+{
+	char yytext[512];
+	int top;
+};
+
+js_Token js_lex(js_State *J, const char **sp, char *yytext, size_t yylen, double *yynumber);
+const char *js_tokentostring(js_Token t);
+
+#endif
--- /dev/null
+++ b/main.c
@@ -1,0 +1,19 @@
+#include "js.h"
+
+int
+main(int argc, char **argv)
+{
+	js_State *J;
+	int i;
+
+	J = js_newstate();
+
+	for (i = 1; i < argc; i++) {
+		js_loadfile(J, argv[1]);
+		// js_run(J);
+	}
+
+	js_close(J);
+
+	return 0;
+}