shithub: rgbds

Download patch

ref: 5ad7a9375076f1d352a9825379e0abee43c2fdfc
parent: 2ec10012b6633f5e23012330fd5faddcfea490b8
author: ISSOtm <[email protected]>
date: Tue Jul 28 21:06:53 EDT 2020

Add EQUS expansion

--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -228,10 +228,12 @@
 static_assert(LEXER_BUF_SIZE <= SSIZE_MAX);
 
 struct Expansion {
-	uint8_t distance; /* How far the expansion's beginning is from the current position */
+	struct Expansion *firstChild;
+	struct Expansion *next;
 	char const *contents;
 	size_t len;
-	struct Expansion *parent;
+	uint8_t distance; /* Distance between the beginning of this expansion and of its parent */
+	uint8_t skip; /* How many extra characters to skip after the expansion is over */
 };
 
 struct LexerState {
@@ -266,7 +268,8 @@
 
 	size_t nbChars; /* Number of chars of lookahead, for processing expansions */
 	bool expandStrings;
-	struct Expansion *expansion;
+	struct Expansion *expansions;
+	size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */
 };
 
 struct LexerState *lexerState = NULL;
@@ -349,7 +352,8 @@
 
 	state->nbChars = 0;
 	state->expandStrings = true;
-	state->expansion = NULL;
+	state->expansions = NULL;
+	state->expansionOfs = 0;
 	return state;
 }
 
@@ -453,6 +457,67 @@
 		fatalerror("realloc error while resizing capture buffer: %s\n", strerror(errno));
 }
 
+static struct Expansion *getExpansionAtDistance(size_t *distance)
+{
+	struct Expansion *expansion = lexerState->expansions;
+	struct Expansion *prevLevel = NULL; /* Top level has no "previous" level */
+
+	for (;;) {
+		/* Find the closest expansion whose end is after the target */
+		while (expansion && expansion->len - expansion->distance <= *distance) {
+			*distance -= expansion->skip;
+			expansion = expansion->next;
+		}
+
+		/* If there is none, or it begins after the target, return the previous level */
+		if (!expansion || expansion->distance > *distance)
+			return prevLevel;
+
+		/* We know we are inside of that expansion */
+		*distance -= expansion->distance; /* Distances are relative to their parent */
+
+		if (!expansion->firstChild) /* If there are no children, this is it */
+			return expansion;
+		/* Otherwise, register this expansion and repeat the process */
+		prevLevel = expansion;
+		expansion = expansion->firstChild;
+	}
+}
+
+static void beginExpansion(size_t distance, uint8_t skip, char const *str, size_t size)
+{
+	struct Expansion *parent = getExpansionAtDistance(&distance);
+	struct Expansion **insertPoint = parent ? &parent->firstChild : &lexerState->expansions;
+
+	/* We cannot be *inside* of any of these expansions, so just keep the list sorted */
+	while (*insertPoint && (*insertPoint)->distance < distance)
+		insertPoint = &(*insertPoint)->next;
+
+	*insertPoint = malloc(sizeof(**insertPoint));
+	if (!*insertPoint)
+		fatalerror("Unable to allocate new expansion: %s", strerror(errno));
+	(*insertPoint)->firstChild = NULL;
+	(*insertPoint)->next = NULL; /* Expansions are always performed left to right */
+	(*insertPoint)->contents = str;
+	(*insertPoint)->len = size;
+	(*insertPoint)->distance = distance;
+	(*insertPoint)->skip = skip;
+
+	/* If expansion is the new closest one, update offset */
+	if (insertPoint == &lexerState->expansions)
+		lexerState->expansionOfs = 0;
+}
+
+static void freeExpansion(struct Expansion *expansion)
+{
+	do {
+		struct Expansion *next = expansion->next;
+
+		free(expansion);
+		expansion = next;
+	} while (expansion);
+}
+
 /* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */
 static int peek(uint8_t distance)
 {
@@ -460,6 +525,16 @@
 		fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n",
 			   distance, LEXER_BUF_SIZE);
 
+	size_t ofs = lexerState->expansionOfs + distance;
+	struct Expansion const *expansion = getExpansionAtDistance(&ofs);
+
+	if (expansion) {
+		assert(distance < expansion->len);
+		return expansion->contents[ofs];
+	}
+
+	distance = ofs - lexerState->expansionOfs;
+
 	if (lexerState->isMmapped) {
 		if (lexerState->offset + distance >= lexerState->size)
 			return EOF;
@@ -579,6 +654,42 @@
 		}
 	}
 
+	/*
+	 * The logic is as follows:
+	 * - Any characters up to the expansion need to be consumed in the file
+	 * - If some remain after that, advance the offset within the expansion
+	 * - If that goes *past* the expansion, then leftovers shall be consumed in the file
+	 * - If we went past the expansion, we're back to square one, and should re-do all
+	 */
+nextExpansion:
+	if (lexerState->expansions) {
+		/* If the read cursor reaches into the expansion, update offset */
+		if (distance > lexerState->expansions->distance) {
+			/* distance = <file chars (expansion distance)> + <expansion chars> */
+			lexerState->expansionOfs += distance - lexerState->expansions->distance;
+			distance = lexerState->expansions->distance; /* Nb chars to read in file */
+			/* Now, check if the expansion finished being read */
+			if (lexerState->expansionOfs >= lexerState->expansions->len) {
+				/* Add the leftovers to the distance */
+				distance += lexerState->expansionOfs - lexerState->expansions->len;
+				/* Also add in the post-expansion skip */
+				distance += lexerState->expansions->skip;
+				/* Move on to the next expansion */
+				struct Expansion *next = lexerState->expansions->next;
+
+				freeExpansion(lexerState->expansions);
+				lexerState->expansions = next;
+				/* Reset the offset for the next expansion */
+				lexerState->expansionOfs = 0;
+				/* And repeat, in case we also go into or over the next expansion */
+				goto nextExpansion;
+			}
+		}
+		/* Getting closer to the expansion */
+		lexerState->expansions->distance -= distance;
+		/* Now, `distance` is how many bytes to move forward **in the file** */
+	}
+
 	if (lexerState->isMmapped) {
 		lexerState->offset += distance;
 	} else {
@@ -1261,7 +1372,17 @@
 				if (tokenType != T_ID && tokenType != T_LOCAL_ID)
 					return tokenType;
 
-				/* TODO: attempt string expansion */
+				if (lexerState->expandStrings) {
+					/* Attempt string expansion */
+					struct Symbol const *sym = sym_FindSymbol(yylval.tzSym);
+
+					if (sym && sym->type == SYM_EQUS) {
+						char const *s = sym_GetStringValue(sym);
+
+						beginExpansion(0, 0, s, strlen(s));
+						continue; /* Restart, reading from the new buffer */
+					}
+				}
 
 				if (tokenType == T_ID && lexerState->atLineStart)
 					return T_LABEL;