ref: 3036b5859894be1bc4523d7e309fca5015728679
parent: 3a44cc7722315a305561cb89b63ecba674a6cdaa
parent: 2eca43cd2da5a4be21cada359bd5e152f1896453
author: Eldred Habert <[email protected]>
date: Sun Oct 4 12:45:47 EDT 2020
Merge pull request #557 from ISSOtm/new-lexer-electric-boogaloo New lexer 2 — Electric Boogaloo
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,7 +46,7 @@
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
else()
if(DEVELOP)
- add_compile_options(-Werror -Wall -Wextra -pedantic
+ add_compile_options(-Werror -Wall -Wextra -pedantic -Wno-type-limits
-Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2
-Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused
-Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5
--- a/Makefile
+++ b/Makefile
@@ -56,7 +56,6 @@
src/asm/asmy.o \
src/asm/charmap.o \
src/asm/fstack.o \
- src/asm/globlex.o \
src/asm/lexer.o \
src/asm/macro.o \
src/asm/main.o \
@@ -73,7 +72,7 @@
src/hashmap.o \
src/linkdefs.o
-src/asm/globlex.o src/asm/lexer.o src/asm/constexpr.o: src/asm/asmy.h
+src/asm/lexer.o src/asm/main.o: src/asm/asmy.h
rgblink_obj := \
src/link/assign.o \
@@ -187,7 +186,7 @@
# compilation and make the continous integration infrastructure return failure.
develop:
- $Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic \
+ $Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-type-limits \
-Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2 \
-Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused \
-Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5 \
@@ -199,7 +198,7 @@
-fsanitize=unreachable -fsanitize=vla-bound \
-fsanitize=signed-integer-overflow -fsanitize=bounds \
-fsanitize=object-size -fsanitize=bool -fsanitize=enum \
- -fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-g -O0"
+ -fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-ggdb3 -O0"
# Targets for the project maintainer to easily create Windows exes.
# This is not for Windows users!
--- a/include/asm/asm.h
+++ b/include/asm/asm.h
@@ -24,15 +24,8 @@
#define MAXMACROARGS 99999
#define MAXINCPATHS 128
-extern int32_t nLineNo;
extern uint32_t nTotalLines;
extern uint32_t nIFDepth;
-extern bool skipElif;
-extern char tzCurrentFileName[_MAX_PATH + 1];
extern struct Section *pCurrentSection;
-extern bool oDontExpandStrings;
-
-size_t symvaluetostring(char *dest, size_t maxLength, char *sym,
- const char *mode);
#endif /* RGBDS_ASM_ASM_H */
--- a/include/asm/fstack.h
+++ b/include/asm/fstack.h
@@ -21,36 +21,59 @@
#include "types.h"
-struct MacroArgs;
+struct FileStackNode {
+ struct FileStackNode *parent; /* Pointer to parent node, for error reporting */
+ /* Line at which the parent context was exited; meaningless for the root level */
+ uint32_t lineNo;
-struct sContext {
- YY_BUFFER_STATE FlexHandle;
- struct Symbol const *pMacro;
- struct sContext *next;
- char tzFileName[_MAX_PATH + 1];
- struct MacroArgs *macroArgs;
- uint32_t uniqueID;
- int32_t nLine;
- uint32_t nStatus;
- FILE *pFile;
- char *pREPTBlock;
- uint32_t nREPTBlockCount;
- uint32_t nREPTBlockSize;
- int32_t nREPTBodyFirstLine;
- int32_t nREPTBodyLastLine;
+ struct FileStackNode *next; /* Next node in the output linked list */
+ bool referenced; /* If referenced, don't free! */
+ uint32_t ID; /* Set only if referenced: ID within the object file, -1 if not output yet */
+
+ enum {
+ NODE_REPT,
+ NODE_FILE,
+ NODE_MACRO,
+ } type;
};
-extern unsigned int nMaxRecursionDepth;
+struct FileStackReptNode { /* NODE_REPT */
+ struct FileStackNode node;
+ uint32_t reptDepth;
+ /* WARNING: if changing this type, change overflow check in `fstk_Init` */
+ uint32_t iters[]; /* REPT iteration counts since last named node, in reverse depth order */
+};
-void fstk_RunInclude(char *tzFileName);
-void fstk_Init(char *s);
-void fstk_Dump(void);
-void fstk_DumpToStr(char *buf, size_t len);
-void fstk_DumpStringExpansions(void);
-void fstk_AddIncludePath(char *s);
-void fstk_RunMacro(char *s, struct MacroArgs *args);
-void fstk_RunRept(uint32_t count, int32_t nReptLineNo);
-FILE *fstk_FindFile(char const *fname, char **incPathUsed);
-int32_t fstk_GetLine(void);
+struct FileStackNamedNode { /* NODE_FILE, NODE_MACRO */
+ struct FileStackNode node;
+ char name[]; /* File name for files, file::macro name for macros */
+};
+
+extern size_t nMaxRecursionDepth;
+
+struct MacroArgs;
+
+void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo);
+void fstk_DumpCurrent(void);
+struct FileStackNode *fstk_GetFileStack(void);
+/* The lifetime of the returned chars is until reaching the end of that file */
+char const *fstk_GetFileName(void);
+
+void fstk_AddIncludePath(char const *s);
+/**
+ * @param path The user-provided file name
+ * @param fullPath The address of a pointer, which will be made to point at the full path
+ * The pointer's value must be a valid argument to `realloc`, including NULL
+ * @param size Current size of the buffer, or 0 if the pointer is NULL
+ * @return True if the file was found, false if no path worked
+ */
+bool fstk_FindFile(char const *path, char **fullPath, size_t *size);
+
+bool yywrap(void);
+void fstk_RunInclude(char const *path);
+void fstk_RunMacro(char const *macroName, struct MacroArgs *args);
+void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size);
+
+void fstk_Init(char const *mainPath, size_t maxRecursionDepth);
#endif /* RGBDS_ASM_FSTACK_H */
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -9,78 +9,65 @@
#ifndef RGBDS_ASM_LEXER_H
#define RGBDS_ASM_LEXER_H
-#include <stdint.h>
-#include <stdio.h>
-
-#define LEXHASHSIZE (1 << 11)
#define MAXSTRLEN 255
-struct sLexInitString {
- char *tzName;
- uint32_t nToken;
-};
+struct LexerState;
+extern struct LexerState *lexerState;
+extern struct LexerState *lexerStateEOL;
-struct sLexFloat {
- uint32_t (*Callback)(char *s, uint32_t size);
- uint32_t nToken;
-};
+static inline struct LexerState *lexer_GetState(void)
+{
+ return lexerState;
+}
-struct yy_buffer_state {
- /* Actual starting address */
- char *pBufferRealStart;
- /* Address where the data is initially written after a safety margin */
- char *pBufferStart;
- char *pBuffer;
- size_t nBufferSize;
- uint32_t oAtLineStart;
-};
+static inline void lexer_SetState(struct LexerState *state)
+{
+ lexerState = state;
+}
-enum eLexerState {
- LEX_STATE_NORMAL,
- LEX_STATE_MACROARGS
-};
+static inline void lexer_SetStateAtEOL(struct LexerState *state)
+{
+ lexerStateEOL = state;
+}
-struct sStringExpansionPos {
- char *tzName;
- char *pBuffer;
- char *pBufferPos;
- struct sStringExpansionPos *pParent;
-};
+extern char const *binDigits;
+extern char const *gfxDigits;
-#define INITIAL 0
-#define macroarg 3
+static inline void lexer_SetBinDigits(char const *digits)
+{
+ binDigits = digits;
+}
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
+static inline void lexer_SetGfxDigits(char const *digits)
+{
+ gfxDigits = digits;
+}
-void setup_lexer(void);
+/*
+ * `path` is referenced, but not held onto..!
+ */
+struct LexerState *lexer_OpenFile(char const *path);
+struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo);
+void lexer_RestartRept(uint32_t lineNo);
+void lexer_DeleteState(struct LexerState *state);
+void lexer_Init(void);
-void yy_set_state(enum eLexerState i);
-YY_BUFFER_STATE yy_create_buffer(FILE *f);
-YY_BUFFER_STATE yy_scan_bytes(char const *mem, uint32_t size);
-void yy_delete_buffer(YY_BUFFER_STATE buf);
-void yy_switch_to_buffer(YY_BUFFER_STATE buf);
-uint32_t lex_FloatAlloc(const struct sLexFloat *tok);
-void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end);
-void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end);
-void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end);
-void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end);
-void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end);
-void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end);
-void lex_Init(void);
-void lex_AddStrings(const struct sLexInitString *lex);
-void lex_SetBuffer(char *buffer, uint32_t len);
-void lex_BeginStringExpansion(const char *tzName);
-int yywrap(void);
-int yylex(void);
-void yyunput(char c);
-void yyunputstr(const char *s);
-void yyskipbytes(uint32_t count);
-void yyunputbytes(uint32_t count);
+enum LexerMode {
+ LEXER_NORMAL,
+ LEXER_RAW,
+ LEXER_SKIP_TO_ELIF,
+ LEXER_SKIP_TO_ENDC
+};
-extern YY_BUFFER_STATE pCurrentBuffer;
-extern struct sStringExpansionPos *pCurrentStringExpansion;
+void lexer_SetMode(enum LexerMode mode);
+void lexer_ToggleStringExpansion(bool enable);
-void upperstring(char *s);
-void lowerstring(char *s);
+char const *lexer_GetFileName(void);
+uint32_t lexer_GetLineNo(void);
+uint32_t lexer_GetColNo(void);
+void lexer_DumpStringExpansions(void);
+int yylex(void);
+void lexer_CaptureRept(char **capture, size_t *size);
+void lexer_CaptureMacroBody(char **capture, size_t *size);
#endif /* RGBDS_ASM_LEXER_H */
--- a/include/asm/macro.h
+++ b/include/asm/macro.h
@@ -28,6 +28,7 @@
uint32_t macro_GetUniqueID(void);
char const *macro_GetUniqueIDStr(void);
void macro_SetUniqueID(uint32_t id);
+uint32_t macro_UseNewUniqueID(void);
void macro_ShiftCurrentArgs(void);
uint32_t macro_NbArgs(void);
--- a/include/asm/main.h
+++ b/include/asm/main.h
@@ -43,6 +43,10 @@
void opt_Pop(void);
void opt_Parse(char *s);
+void upperstring(char *s);
+void lowerstring(char *s);
+
+/* TODO: are these really needed? */
#define YY_FATAL_ERROR fatalerror
#ifdef YYLMAX
--- a/include/asm/output.h
+++ b/include/asm/output.h
@@ -18,6 +18,8 @@
extern char *tzObjectname;
extern struct Section *pSectionList, *pCurrentSection;
+void out_RegisterNode(struct FileStackNode *node);
+void out_ReplaceNode(struct FileStackNode *node);
void out_SetFileName(char *s);
void out_CreatePatch(uint32_t type, struct Expression const *expr,
uint32_t ofs);
--- a/include/asm/symbol.h
+++ b/include/asm/symbol.h
@@ -35,18 +35,21 @@
bool isExported; /* Whether the symbol is to be exported */
bool isBuiltin; /* Whether the symbol is a built-in */
struct Section *section;
- char fileName[_MAX_PATH + 1]; /* File where the symbol was defined. */
- uint32_t fileLine; /* Line where the symbol was defined. */
+ struct FileStackNode *src; /* Where the symbol was defined */
+ uint32_t fileLine; /* Line where the symbol was defined */
+ bool hasCallback;
union {
- struct { /* If sym_IsNumeric */
- int32_t value;
- int32_t (*callback)(void);
- };
- struct { /* For SYM_MACRO */
- uint32_t macroSize;
+ /* If sym_IsNumeric */
+ int32_t value;
+ int32_t (*numCallback)(void);
+ /* For SYM_MACRO */
+ struct {
+ size_t macroSize;
char *macro;
};
+ /* For SYM_EQUS, TODO: separate "base" fields from SYM_MACRO */
+ char const *(*strCallback)(void); /* For SYM_EQUS */
};
uint32_t ID; /* ID of the symbol in the object file (-1 if none) */
@@ -101,6 +104,8 @@
*/
static inline char const *sym_GetStringValue(struct Symbol const *sym)
{
+ if (sym->hasCallback)
+ return sym->strCallback();
return sym->macro;
}
@@ -114,9 +119,10 @@
struct Symbol *sym_AddEqu(char const *symName, int32_t value);
struct Symbol *sym_AddSet(char const *symName, int32_t value);
uint32_t sym_GetPCValue(void);
+uint32_t sym_GetConstantSymValue(struct Symbol const *sym);
uint32_t sym_GetConstantValue(char const *s);
struct Symbol *sym_FindSymbol(char const *symName);
-struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo);
+struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size);
struct Symbol *sym_Ref(char const *symName);
struct Symbol *sym_AddString(char const *symName, char const *value);
uint32_t sym_GetDefinedValue(char const *s);
--- a/include/asm/util.h
+++ b/include/asm/util.h
@@ -12,6 +12,7 @@
#include <stdint.h>
uint32_t calchash(const char *s);
+char const *print(int c);
size_t readUTF8Char(uint8_t *dest, char const *src);
#endif /* RGBDS_UTIL_H */
--- a/include/link/main.h
+++ b/include/link/main.h
@@ -29,6 +29,25 @@
extern bool isWRA0Mode;
extern bool disablePadding;
+struct FileStackNode {
+ struct FileStackNode *parent;
+ /* Line at which the parent context was exited; meaningless for the root level */
+ uint32_t lineNo;
+
+ enum {
+ NODE_REPT,
+ NODE_FILE,
+ NODE_MACRO,
+ } type;
+ union {
+ char *name; /* NODE_FILE, NODE_MACRO */
+ struct { /* NODE_REPT */
+ uint32_t reptDepth;
+ uint32_t *iters;
+ };
+ };
+};
+
/* Helper macro for printing verbose-mode messages */
#define verbosePrint(...) do { \
if (beVerbose) \
@@ -35,9 +54,20 @@
fprintf(stderr, __VA_ARGS__); \
} while (0)
-void error(char const *fmt, ...);
+/**
+ * Dump a file stack to stderr
+ * @param node The leaf node to dump the context of
+ */
+char const *dumpFileStack(struct FileStackNode const *node);
-noreturn_ void fatal(char const *fmt, ...);
+void warning(struct FileStackNode const *where, uint32_t lineNo,
+ char const *fmt, ...) format_(printf, 3, 4);
+
+void error(struct FileStackNode const *where, uint32_t lineNo,
+ char const *fmt, ...) format_(printf, 3, 4);
+
+noreturn_ void fatal(struct FileStackNode const *where, uint32_t lineNo,
+ char const *fmt, ...) format_(printf, 3, 4);
/**
* Opens a file if specified, and aborts on error.
--- a/include/link/object.h
+++ b/include/link/object.h
@@ -14,8 +14,9 @@
/**
* Read an object (.o) file, and add its info to the data structures.
* @param fileName A path to the object file to be read
+ * @param i The ID of the file
*/
-void obj_ReadFile(char const *fileName);
+void obj_ReadFile(char const *fileName, unsigned int i);
/**
* Perform validation on the object files' contents
@@ -26,6 +27,12 @@
* Evaluate all assertions
*/
void obj_CheckAssertions(void);
+
+/**
+ * Sets up object file reading
+ * @param nbFiles The number of object files that will be read
+ */
+void obj_Setup(unsigned int nbFiles);
/**
* `free`s all object memory that was allocated.
--- a/include/link/section.h
+++ b/include/link/section.h
@@ -19,6 +19,7 @@
#include "linkdefs.h"
+struct FileStackNode;
struct Section;
struct AttachedSymbol {
@@ -27,7 +28,8 @@
};
struct Patch {
- char *fileName;
+ struct FileStackNode const *src;
+ uint32_t lineNo;
int32_t offset;
uint32_t pcSectionID;
uint32_t pcOffset;
--- a/include/link/symbol.h
+++ b/include/link/symbol.h
@@ -16,12 +16,14 @@
#include "linkdefs.h"
+struct FileStackNode;
+
struct Symbol {
/* Info contained in the object files */
char *name;
enum ExportLevel type;
char const *objFileName;
- char *fileName;
+ struct FileStackNode const *src;
int32_t lineNo;
int32_t sectionID;
union {
--- a/include/linkdefs.h
+++ b/include/linkdefs.h
@@ -14,7 +14,7 @@
#define RGBDS_OBJECT_VERSION_STRING "RGB%1u"
#define RGBDS_OBJECT_VERSION_NUMBER 9U
-#define RGBDS_OBJECT_REV 5U
+#define RGBDS_OBJECT_REV 6U
enum AssertionType {
ASSERT_WARN,
--- a/include/platform.h
+++ b/include/platform.h
@@ -32,4 +32,11 @@
# define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
#endif
+/* MSVC doesn't use POSIX types or defines for `read` */
+#ifdef _MSC_VER
+# define STDIN_FILENO 0
+# define ssize_t int
+# define SSIZE_MAX INT_MAX
+#endif
+
#endif /* RGBDS_PLATFORM_H */
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -31,7 +31,6 @@
"${BISON_ASMy_OUTPUT_SOURCE}"
"asm/charmap.c"
"asm/fstack.c"
- "asm/globlex.c"
"asm/lexer.c"
"asm/macro.c"
"asm/main.c"
--- a/src/asm/asmy.y
+++ b/src/asm/asmy.y
@@ -39,63 +39,8 @@
char *tzNewMacro;
uint32_t ulNewMacroSize;
int32_t nPCOffset;
+bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
-size_t symvaluetostring(char *dest, size_t maxLength, char *symName,
- const char *mode)
-{
- size_t length;
- struct Symbol *sym = sym_FindSymbol(symName);
-
- if (sym && sym->type == SYM_EQUS) {
- char const *src = sym_GetStringValue(sym);
- size_t i;
-
- if (mode)
- error("Print types are only allowed for numbers\n");
-
- for (i = 0; src[i] != 0; i++) {
- if (i >= maxLength)
- fatalerror("Symbol value too long to fit buffer\n");
-
- dest[i] = src[i];
- }
-
- length = i;
-
- } else {
- uint32_t value = sym_GetConstantValue(symName);
- int32_t fullLength;
-
- /* Special cheat for binary */
- if (mode && !mode[0]) {
- char binary[33]; /* 32 bits + 1 terminator */
- char *write_ptr = binary + 32;
- fullLength = 0;
- binary[32] = 0;
- do {
- *(--write_ptr) = (value & 1) + '0';
- value >>= 1;
- fullLength++;
- } while(value);
- strncpy(dest, write_ptr, maxLength + 1);
- } else {
- fullLength = snprintf(dest, maxLength + 1,
- mode ? mode : "$%" PRIX32,
- value);
- }
-
- if (fullLength < 0) {
- fatalerror("snprintf encoding error\n");
- } else {
- length = (size_t)fullLength;
- if (length > maxLength)
- fatalerror("Symbol value too long to fit buffer\n");
- }
- }
-
- return length;
-}
-
static uint32_t str2int2(uint8_t *s, int32_t length)
{
int32_t i;
@@ -111,278 +56,6 @@
return r;
}
-static uint32_t isWhiteSpace(char s)
-{
- return (s == ' ') || (s == '\t') || (s == '\0') || (s == '\n');
-}
-
-static uint32_t isRept(char *s)
-{
- return (strncasecmp(s, "REPT", 4) == 0)
- && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]);
-}
-
-static uint32_t isEndr(char *s)
-{
- return (strncasecmp(s, "ENDR", 4) == 0)
- && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]);
-}
-
-static void copyrept(void)
-{
- int32_t level = 1, len, instring = 0;
- char *src = pCurrentBuffer->pBuffer;
- char *bufferEnd = pCurrentBuffer->pBufferStart
- + pCurrentBuffer->nBufferSize;
-
- while (src < bufferEnd && level) {
- if (instring == 0) {
- if (isRept(src)) {
- level++;
- src += 4;
- } else if (isEndr(src)) {
- level--;
- src += 4;
- } else {
- if (*src == '\"')
- instring = 1;
- src++;
- }
- } else {
- if (*src == '\\') {
- src += 2;
- } else if (*src == '\"') {
- src++;
- instring = 0;
- } else {
- src++;
- }
- }
- }
-
- if (level != 0)
- fatalerror("Unterminated REPT block\n");
-
- len = src - pCurrentBuffer->pBuffer - 4;
-
- src = pCurrentBuffer->pBuffer;
- ulNewMacroSize = len;
-
- tzNewMacro = malloc(ulNewMacroSize + 1);
-
- if (tzNewMacro == NULL)
- fatalerror("Not enough memory for REPT block.\n");
-
- uint32_t i;
-
- tzNewMacro[ulNewMacroSize] = 0;
- for (i = 0; i < ulNewMacroSize; i++) {
- tzNewMacro[i] = src[i];
- if (src[i] == '\n')
- nLineNo++;
- }
-
- yyskipbytes(ulNewMacroSize + 4);
-
-}
-
-static uint32_t isMacro(char *s)
-{
- return (strncasecmp(s, "MACRO", 4) == 0)
- && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[5]);
-}
-
-static uint32_t isEndm(char *s)
-{
- return (strncasecmp(s, "ENDM", 4) == 0)
- && isWhiteSpace(*(s - 1)) && isWhiteSpace(s[4]);
-}
-
-static void copymacro(void)
-{
- int32_t level = 1, len, instring = 0;
- char *src = pCurrentBuffer->pBuffer;
- char *bufferEnd = pCurrentBuffer->pBufferStart
- + pCurrentBuffer->nBufferSize;
-
- while (src < bufferEnd && level) {
- if (instring == 0) {
- if (isMacro(src)) {
- level++;
- src += 4;
- } else if (isEndm(src)) {
- level--;
- src += 4;
- } else {
- if(*src == '\"')
- instring = 1;
- src++;
- }
- } else {
- if (*src == '\\') {
- src += 2;
- } else if (*src == '\"') {
- src++;
- instring = 0;
- } else {
- src++;
- }
- }
- }
-
- if (level != 0)
- fatalerror("Unterminated MACRO definition.\n");
-
- len = src - pCurrentBuffer->pBuffer - 4;
-
- src = pCurrentBuffer->pBuffer;
- ulNewMacroSize = len;
-
- tzNewMacro = (char *)malloc(ulNewMacroSize + 1);
- if (tzNewMacro == NULL)
- fatalerror("Not enough memory for MACRO definition.\n");
-
- uint32_t i;
-
- tzNewMacro[ulNewMacroSize] = 0;
- for (i = 0; i < ulNewMacroSize; i++) {
- tzNewMacro[i] = src[i];
- if (src[i] == '\n')
- nLineNo++;
- }
-
- yyskipbytes(ulNewMacroSize + 4);
-}
-
-static bool endsIf(char c)
-{
- return isWhiteSpace(c) || c == '(' || c == '{';
-}
-
-static uint32_t isIf(char *s)
-{
- return (strncasecmp(s, "IF", 2) == 0)
- && isWhiteSpace(s[-1]) && endsIf(s[2]);
-}
-
-static uint32_t isElif(char *s)
-{
- return (strncasecmp(s, "ELIF", 4) == 0)
- && isWhiteSpace(s[-1]) && endsIf(s[4]);
-}
-
-static uint32_t isElse(char *s)
-{
- return (strncasecmp(s, "ELSE", 4) == 0)
- && isWhiteSpace(s[-1]) && isWhiteSpace(s[4]);
-}
-
-static uint32_t isEndc(char *s)
-{
- return (strncasecmp(s, "ENDC", 4) == 0)
- && isWhiteSpace(s[-1]) && isWhiteSpace(s[4]);
-}
-
-static void if_skip_to_else(void)
-{
- int32_t level = 1;
- bool inString = false;
- char *src = pCurrentBuffer->pBuffer;
-
- while (*src && level) {
- if (*src == '\n')
- nLineNo++;
-
- if (!inString) {
- if (isIf(src)) {
- level++;
- src += 2;
-
- } else if (level == 1 && isElif(src)) {
- level--;
- skipElif = false;
-
- } else if (level == 1 && isElse(src)) {
- level--;
- src += 4;
-
- } else if (isEndc(src)) {
- level--;
- if (level != 0)
- src += 4;
-
- } else {
- if (*src == '\"')
- inString = true;
- src++;
- }
- } else {
- if (*src == '\"') {
- inString = false;
- } else if (*src == '\\') {
- /* Escaped quotes don't end the string */
- if (*++src != '\"')
- src--;
- }
- src++;
- }
- }
-
- if (level != 0)
- fatalerror("Unterminated IF construct\n");
-
- int32_t len = src - pCurrentBuffer->pBuffer;
-
- yyskipbytes(len);
- yyunput('\n');
- nLineNo--;
-}
-
-static void if_skip_to_endc(void)
-{
- int32_t level = 1;
- bool inString = false;
- char *src = pCurrentBuffer->pBuffer;
-
- while (*src && level) {
- if (*src == '\n')
- nLineNo++;
-
- if (!inString) {
- if (isIf(src)) {
- level++;
- src += 2;
- } else if (isEndc(src)) {
- level--;
- if (level != 0)
- src += 4;
- } else {
- if (*src == '\"')
- inString = true;
- src++;
- }
- } else {
- if (*src == '\"') {
- inString = false;
- } else if (*src == '\\') {
- /* Escaped quotes don't end the string */
- if (*++src != '\"')
- src--;
- }
- src++;
- }
- }
-
- if (level != 0)
- fatalerror("Unterminated IF construct\n");
-
- int32_t len = src - pCurrentBuffer->pBuffer;
-
- yyskipbytes(len);
- yyunput('\n');
- nLineNo--;
-}
-
static size_t strlenUTF8(const char *s)
{
size_t len = 0;
@@ -659,19 +332,72 @@
| lines {
nListCountEmpty = 0;
nPCOffset = 0;
- } line '\n' {
- nLineNo++;
+ } line {
nTotalLines++;
}
;
-line : label
- | label cpu_command
- | label macro
- | label simple_pseudoop
- | pseudoop
+line : label '\n'
+ | label cpu_command '\n'
+ | label macro '\n'
+ | label simple_pseudoop '\n'
+ | pseudoop '\n'
+ | conditional /* May not necessarily be followed by a newline, see below */
;
+/*
+ * For "logistical" reasons, conditionals must manage newlines themselves.
+ * This is because we need to switch the lexer's mode *after* the newline has been read,
+ * and to avoid causing some grammar conflicts (token reducing is finicky).
+ * This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
+ */
+conditional : if
+ /* It's important that all of these require being at line start for `skipIfBlock` */
+ | elif
+ | else
+ | endc
+;
+
+if : T_POP_IF const '\n' {
+ nIFDepth++;
+ executeElseBlock = !$2;
+ if (executeElseBlock)
+ lexer_SetMode(LEXER_SKIP_TO_ELIF);
+ }
+;
+
+elif : T_POP_ELIF const '\n' {
+ if (nIFDepth <= 0)
+ fatalerror("Found ELIF outside an IF construct\n");
+
+ if (!executeElseBlock) {
+ lexer_SetMode(LEXER_SKIP_TO_ENDC);
+ } else {
+ executeElseBlock = !$2;
+ if (executeElseBlock)
+ lexer_SetMode(LEXER_SKIP_TO_ELIF);
+ }
+ }
+;
+
+else : T_POP_ELSE '\n' {
+ if (nIFDepth <= 0)
+ fatalerror("Found ELSE outside an IF construct\n");
+
+ if (!executeElseBlock)
+ lexer_SetMode(LEXER_SKIP_TO_ENDC);
+ }
+;
+
+endc : T_POP_ENDC '\n' {
+ if (nIFDepth <= 0)
+ fatalerror("Found ENDC outside an IF construct\n");
+
+ nIFDepth--;
+ executeElseBlock = false;
+ }
+;
+
scoped_id : T_ID | T_LOCAL_ID ;
label : /* empty */
@@ -699,9 +425,9 @@
;
macro : T_ID {
- yy_set_state(LEX_STATE_MACROARGS);
+ lexer_SetMode(LEXER_RAW);
} macroargs {
- yy_set_state(LEX_STATE_NORMAL);
+ lexer_SetMode(LEXER_NORMAL);
fstk_RunMacro($1, $3);
}
;
@@ -732,10 +458,6 @@
| printt
| printv
| printi
- | if
- | elif
- | else
- | endc
| export
| db
| dw
@@ -786,9 +508,9 @@
;
opt : T_POP_OPT {
- yy_set_state(LEX_STATE_MACROARGS);
+ lexer_SetMode(LEXER_RAW);
} opt_list {
- yy_set_state(LEX_STATE_NORMAL);
+ lexer_SetMode(LEXER_NORMAL);
}
;
@@ -875,16 +597,20 @@
;
rept : T_POP_REPT uconst {
- uint32_t nDefinitionLineNo = nLineNo;
- copyrept();
- fstk_RunRept($2, nDefinitionLineNo);
+ uint32_t nDefinitionLineNo = lexer_GetLineNo();
+ char *body;
+ size_t size;
+ lexer_CaptureRept(&body, &size);
+ fstk_RunRept($2, nDefinitionLineNo, body, size);
}
;
macrodef : T_LABEL ':' T_POP_MACRO {
- int32_t nDefinitionLineNo = nLineNo;
- copymacro();
- sym_AddMacro($1, nDefinitionLineNo);
+ int32_t nDefinitionLineNo = lexer_GetLineNo();
+ char *body;
+ size_t size;
+ lexer_CaptureMacroBody(&body, &size);
+ sym_AddMacro($1, nDefinitionLineNo, body, size);
}
;
@@ -956,9 +682,9 @@
;
purge : T_POP_PURGE {
- oDontExpandStrings = true;
+ lexer_ToggleStringExpansion(false);
} purge_list {
- oDontExpandStrings = false;
+ lexer_ToggleStringExpansion(true);
}
;
@@ -1052,62 +778,6 @@
printf : T_POP_PRINTF const { math_Print($2); }
;
-if : T_POP_IF const {
- nIFDepth++;
- if (!$2)
- if_skip_to_else();
- }
-;
-
-elif : T_POP_ELIF const {
- if (nIFDepth <= 0)
- fatalerror("Found ELIF outside an IF construct\n");
-
- if (skipElif) {
- /*
- * Executed when ELIF is reached at the end of
- * an IF or ELIF block for which the condition
- * was true.
- *
- * Continue parsing at ENDC keyword
- */
- if_skip_to_endc();
- } else {
- /*
- * Executed when ELIF is skipped to because the
- * condition of the previous IF or ELIF block
- * was false.
- */
- skipElif = true;
-
- if (!$2) {
- /*
- * Continue parsing after ELSE, or at
- * ELIF or ENDC keyword.
- */
- if_skip_to_else();
- }
- }
- }
-;
-
-else : T_POP_ELSE {
- if (nIFDepth <= 0)
- fatalerror("Found ELSE outside an IF construct\n");
-
- /* Continue parsing at ENDC keyword */
- if_skip_to_endc();
- }
-;
-
-endc : T_POP_ENDC {
- if (nIFDepth <= 0)
- fatalerror("Found ENDC outside an IF construct\n");
-
- nIFDepth--;
- }
-;
-
const_3bit : const {
int32_t value = $1;
@@ -1267,13 +937,13 @@
}
| T_OP_BANK '(' string ')' { rpn_BankSection(&$$, $3); }
| T_OP_DEF {
- oDontExpandStrings = true;
+ lexer_ToggleStringExpansion(false);
} '(' scoped_id ')' {
struct Symbol const *sym = sym_FindSymbol($4);
rpn_Number(&$$, !!sym);
- oDontExpandStrings = false;
+ lexer_ToggleStringExpansion(true);
}
| T_OP_ROUND '(' const ')' {
rpn_Number(&$$, math_Round($3));
--- a/src/asm/fstack.c
+++ b/src/asm/fstack.c
@@ -6,554 +6,470 @@
* SPDX-License-Identifier: MIT
*/
-/*
- * FileStack routines
- */
-
+#include <sys/stat.h>
+#include <assert.h>
#include <errno.h>
#include <inttypes.h>
-#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
#include "asm/fstack.h"
-#include "asm/lexer.h"
#include "asm/macro.h"
#include "asm/main.h"
-#include "asm/output.h"
+#include "asm/symbol.h"
#include "asm/warning.h"
+#include "platform.h" /* S_ISDIR (stat macro) */
-#include "extern/err.h"
+#ifdef LEXER_DEBUG
+ #define dbgPrint(...) fprintf(stderr, "[lexer] " __VA_ARGS__)
+#else
+ #define dbgPrint(...)
+#endif
-#include "platform.h" // S_ISDIR (stat macro)
-#include "types.h"
+struct Context {
+ struct Context *parent;
+ struct FileStackNode *fileInfo;
+ struct LexerState *lexerState;
+ uint32_t uniqueID;
+ struct MacroArgs *macroArgs; /* Macro args are *saved* here */
+ uint32_t nbReptIters;
+};
-static struct sContext *pFileStack;
-static unsigned int nFileStackDepth;
-unsigned int nMaxRecursionDepth;
-static struct Symbol const *pCurrentMacro;
-static YY_BUFFER_STATE CurrentFlexHandle;
-static FILE *pCurrentFile;
-static uint32_t nCurrentStatus;
-char tzCurrentFileName[_MAX_PATH + 1];
-static char IncludePaths[MAXINCPATHS][_MAX_PATH + 1];
-static int32_t NextIncPath;
-static uint32_t nMacroCount;
+static struct Context *contextStack;
+static size_t contextDepth = 0;
+#define DEFAULT_MAX_DEPTH 64
+size_t nMaxRecursionDepth;
-static char *pCurrentREPTBlock;
-static uint32_t nCurrentREPTBlockSize;
-static uint32_t nCurrentREPTBlockCount;
-static int32_t nCurrentREPTBodyFirstLine;
-static int32_t nCurrentREPTBodyLastLine;
+static unsigned int nbIncPaths = 0;
+static char const *includePaths[MAXINCPATHS];
-uint32_t ulMacroReturnValue;
+char const *dumpNodeAndParents(struct FileStackNode const *node)
+{
+ char const *name;
-/*
- * defines for nCurrentStatus
- */
-#define STAT_isInclude 0 /* 'Normal' state as well */
-#define STAT_isMacro 1
-#define STAT_isMacroArg 2
-#define STAT_isREPTBlock 3
+ if (node->type == NODE_REPT) {
+ assert(node->parent); /* REPT nodes should always have a parent */
+ struct FileStackReptNode const *reptInfo = (struct FileStackReptNode const *)node;
-/* Max context stack size */
+ name = dumpNodeAndParents(node->parent);
+ fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, name);
+ for (uint32_t i = reptInfo->reptDepth; i--; )
+ fprintf(stderr, "::REPT~%" PRIu32, reptInfo->iters[i]);
+ } else {
+ name = ((struct FileStackNamedNode const *)node)->name;
+ if (node->parent) {
+ dumpNodeAndParents(node->parent);
+ fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, name);
+ } else {
+ fputs(name, stderr);
+ }
+ }
+ return name;
+}
-/*
- * Context push and pop
- */
-static void pushcontext(void)
+void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo)
{
- struct sContext **ppFileStack;
+ dumpNodeAndParents(node);
+ fprintf(stderr, "(%" PRIu32 ")", lineNo);
+}
- if (++nFileStackDepth > nMaxRecursionDepth)
- fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth);
+void fstk_DumpCurrent(void)
+{
+ if (!contextStack) {
+ fputs("at top level", stderr);
+ return;
+ }
+ fstk_Dump(contextStack->fileInfo, lexer_GetLineNo());
+}
- ppFileStack = &pFileStack;
- while (*ppFileStack)
- ppFileStack = &((*ppFileStack)->next);
+struct FileStackNode *fstk_GetFileStack(void)
+{
+ struct FileStackNode *node = contextStack->fileInfo;
- *ppFileStack = malloc(sizeof(struct sContext));
-
- if (*ppFileStack == NULL)
- fatalerror("No memory for context\n");
-
- (*ppFileStack)->FlexHandle = CurrentFlexHandle;
- (*ppFileStack)->next = NULL;
- strcpy((char *)(*ppFileStack)->tzFileName, (char *)tzCurrentFileName);
- (*ppFileStack)->nLine = nLineNo;
-
- switch ((*ppFileStack)->nStatus = nCurrentStatus) {
- case STAT_isMacroArg:
- case STAT_isMacro:
- (*ppFileStack)->macroArgs = macro_GetCurrentArgs();
- (*ppFileStack)->pMacro = pCurrentMacro;
- break;
- case STAT_isInclude:
- (*ppFileStack)->pFile = pCurrentFile;
- break;
- case STAT_isREPTBlock:
- (*ppFileStack)->macroArgs = macro_GetCurrentArgs();
- (*ppFileStack)->pREPTBlock = pCurrentREPTBlock;
- (*ppFileStack)->nREPTBlockSize = nCurrentREPTBlockSize;
- (*ppFileStack)->nREPTBlockCount = nCurrentREPTBlockCount;
- (*ppFileStack)->nREPTBodyFirstLine = nCurrentREPTBodyFirstLine;
- (*ppFileStack)->nREPTBodyLastLine = nCurrentREPTBodyLastLine;
- break;
- default:
- fatalerror("%s: Internal error.\n", __func__);
+ /* Mark node and all of its parents as referenced if not already so they don't get freed */
+ while (node && !node->referenced) {
+ node->ID = -1;
+ node->referenced = true;
+ node = node->parent;
}
- (*ppFileStack)->uniqueID = macro_GetUniqueID();
-
- nLineNo = 0;
+ return contextStack->fileInfo;
}
-static int32_t popcontext(void)
+char const *fstk_GetFileName(void)
{
- struct sContext *pLastFile, **ppLastFile;
+ /* Iterating via the nodes themselves skips nested REPTs */
+ struct FileStackNode const *node = contextStack->fileInfo;
- if (nCurrentStatus == STAT_isREPTBlock) {
- if (--nCurrentREPTBlockCount) {
- char *pREPTIterationWritePtr;
- unsigned long nREPTIterationNo;
- int nNbCharsWritten;
- int nNbCharsLeft;
+ while (node->type != NODE_FILE)
+ node = node->parent;
+ return ((struct FileStackNamedNode const *)node)->name;
+}
- yy_delete_buffer(CurrentFlexHandle);
- CurrentFlexHandle =
- yy_scan_bytes(pCurrentREPTBlock,
- nCurrentREPTBlockSize);
- yy_switch_to_buffer(CurrentFlexHandle);
- macro_SetUniqueID(nMacroCount++);
-
- /* Increment REPT count in file path */
- pREPTIterationWritePtr =
- strrchr(tzCurrentFileName, '~') + 1;
- nREPTIterationNo =
- strtoul(pREPTIterationWritePtr, NULL, 10);
- nNbCharsLeft = sizeof(tzCurrentFileName)
- - (pREPTIterationWritePtr - tzCurrentFileName);
- nNbCharsWritten = snprintf(pREPTIterationWritePtr,
- nNbCharsLeft, "%lu",
- nREPTIterationNo + 1);
- if (nNbCharsWritten >= nNbCharsLeft) {
- /*
- * The string is probably corrupted somehow,
- * revert the change to avoid a bad error
- * output.
- */
- sprintf(pREPTIterationWritePtr, "%lu",
- nREPTIterationNo);
- fatalerror("Cannot write REPT count to file path\n");
- }
-
- nLineNo = nCurrentREPTBodyFirstLine;
- return 0;
- }
+void fstk_AddIncludePath(char const *path)
+{
+ if (path[0] == '\0')
+ return;
+ if (nbIncPaths >= MAXINCPATHS) {
+ error("Too many include directories passed from command line\n");
+ return;
}
+ size_t len = strlen(path);
+ size_t allocSize = len + (path[len - 1] != '/') + 1;
+ char *str = malloc(allocSize);
- pLastFile = pFileStack;
- if (pLastFile == NULL)
- return 1;
-
- ppLastFile = &pFileStack;
- while (pLastFile->next) {
- ppLastFile = &(pLastFile->next);
- pLastFile = *ppLastFile;
+ if (!str) {
+ /* Attempt to continue without that path */
+ error("Failed to allocate new include path: %s\n", strerror(errno));
+ return;
}
+ memcpy(str, path, len);
+ char *end = str + len - 1;
- yy_delete_buffer(CurrentFlexHandle);
- nLineNo = nCurrentStatus == STAT_isREPTBlock ? nCurrentREPTBodyLastLine
- : pLastFile->nLine;
+ if (*end++ != '/')
+ *end++ = '/';
+ *end = '\0';
+ includePaths[nbIncPaths++] = str;
+}
- if (nCurrentStatus == STAT_isInclude)
- fclose(pCurrentFile);
-
- if (nCurrentStatus == STAT_isMacro
- || nCurrentStatus == STAT_isREPTBlock)
- nLineNo++;
-
- CurrentFlexHandle = pLastFile->FlexHandle;
- strcpy((char *)tzCurrentFileName, (char *)pLastFile->tzFileName);
-
- switch (pLastFile->nStatus) {
- struct MacroArgs *args;
-
- case STAT_isMacroArg:
- case STAT_isMacro:
- args = macro_GetCurrentArgs();
- if (nCurrentStatus == STAT_isMacro) {
- macro_FreeArgs(args);
- free(args);
- }
- macro_UseNewArgs(pLastFile->macroArgs);
- pCurrentMacro = pLastFile->pMacro;
- break;
- case STAT_isInclude:
- pCurrentFile = pLastFile->pFile;
- break;
- case STAT_isREPTBlock:
- args = macro_GetCurrentArgs();
- if (nCurrentStatus == STAT_isMacro) {
- macro_FreeArgs(args);
- free(args);
- }
- macro_UseNewArgs(pLastFile->macroArgs);
- pCurrentREPTBlock = pLastFile->pREPTBlock;
- nCurrentREPTBlockSize = pLastFile->nREPTBlockSize;
- nCurrentREPTBlockCount = pLastFile->nREPTBlockCount;
- nCurrentREPTBodyFirstLine = pLastFile->nREPTBodyFirstLine;
- break;
- default:
- fatalerror("%s: Internal error.\n", __func__);
+static void printDep(char const *path)
+{
+ if (dependfile) {
+ fprintf(dependfile, "%s: %s\n", tzTargetFileName, path);
+ if (oGeneratePhonyDeps)
+ fprintf(dependfile, "%s:\n", path);
}
- macro_SetUniqueID(pLastFile->uniqueID);
+}
- nCurrentStatus = pLastFile->nStatus;
+static bool isPathValid(char const *path)
+{
+ struct stat statbuf;
- nFileStackDepth--;
+ if (stat(path, &statbuf) != 0)
+ return false;
- free(*ppLastFile);
- *ppLastFile = NULL;
- yy_switch_to_buffer(CurrentFlexHandle);
- return 0;
+ /* Reject directories */
+ return !S_ISDIR(statbuf.st_mode);
}
-int32_t fstk_GetLine(void)
+bool fstk_FindFile(char const *path, char **fullPath, size_t *size)
{
- struct sContext *pLastFile, **ppLastFile;
-
- switch (nCurrentStatus) {
- case STAT_isInclude:
- /* This is the normal mode, also used when including a file. */
- return nLineNo;
- case STAT_isMacro:
- break; /* Peek top file of the stack */
- case STAT_isMacroArg:
- return nLineNo; /* ??? */
- case STAT_isREPTBlock:
- break; /* Peek top file of the stack */
- default:
- fatalerror("%s: Internal error.\n", __func__);
+ if (!*size) {
+ *size = 64; /* This is arbitrary, really */
+ *fullPath = realloc(*fullPath, *size);
+ if (!*fullPath)
+ error("realloc error during include path search: %s\n",
+ strerror(errno));
}
- pLastFile = pFileStack;
+ if (*fullPath) {
+ for (size_t i = 0; i <= nbIncPaths; ++i) {
+ char const *incPath = i ? includePaths[i - 1] : "";
+ int len = snprintf(*fullPath, *size, "%s%s", incPath, path);
- if (pLastFile != NULL) {
- while (pLastFile->next) {
- ppLastFile = &(pLastFile->next);
- pLastFile = *ppLastFile;
+ /* Oh how I wish `asnprintf` was standard... */
+ if (len >= *size) { /* `len` doesn't include the terminator, `size` does */
+ *size = len + 1;
+ *fullPath = realloc(*fullPath, *size);
+ if (!*fullPath) {
+ error("realloc error during include path search: %s\n",
+ strerror(errno));
+ break;
+ }
+ len = sprintf(*fullPath, "%s%s", incPath, path);
+ }
+
+ if (len < 0) {
+ error("snprintf error during include path search: %s\n",
+ strerror(errno));
+ } else if (isPathValid(*fullPath)) {
+ printDep(*fullPath);
+ return true;
+ }
}
- return pLastFile->nLine;
}
- /*
- * This is only reached if the lexer is in REPT or MACRO mode but there
- * are no saved contexts with the origin of said REPT or MACRO.
- */
- fatalerror("%s: Internal error.\n", __func__);
+ errno = ENOENT;
+ if (oGeneratedMissingIncludes)
+ printDep(path);
+ return false;
}
-int yywrap(void)
+bool yywrap(void)
{
- return popcontext();
-}
+ if (contextStack->fileInfo->type == NODE_REPT) { /* The context is a REPT block, which may loop */
+ struct FileStackReptNode *fileInfo = (struct FileStackReptNode *)contextStack->fileInfo;
-/*
- * Dump the context stack to stderr
- */
-void fstk_Dump(void)
-{
- const struct sContext *pLastFile;
+ /* If the node is referenced, we can't edit it; duplicate it */
+ if (contextStack->fileInfo->referenced) {
+ size_t size = sizeof(*fileInfo) + sizeof(fileInfo->iters[0]) * fileInfo->reptDepth;
+ struct FileStackReptNode *copy = malloc(size);
- pLastFile = pFileStack;
+ if (!copy)
+ fatalerror("Failed to duplicate REPT file node: %s\n", strerror(errno));
+ /* Copy all info but the referencing */
+ memcpy(copy, fileInfo, size);
+ copy->node.next = NULL;
+ copy->node.referenced = false;
- while (pLastFile) {
- fprintf(stderr, "%s(%" PRId32 ") -> ", pLastFile->tzFileName,
- pLastFile->nLine);
- pLastFile = pLastFile->next;
+ fileInfo = copy;
+ contextStack->fileInfo = (struct FileStackNode *)fileInfo;
+ }
+
+ fileInfo->iters[0]++;
+ /* If this wasn't the last iteration, wrap instead of popping */
+ if (fileInfo->iters[0] <= contextStack->nbReptIters) {
+ lexer_RestartRept(contextStack->fileInfo->lineNo);
+ contextStack->uniqueID = macro_UseNewUniqueID();
+ return false;
+ }
+ } else if (!contextStack->parent) {
+ return true;
}
+ dbgPrint("Popping context\n");
- fprintf(stderr, "%s(%" PRId32 ")", tzCurrentFileName, nLineNo);
-}
+ struct Context *context = contextStack;
-void fstk_DumpToStr(char *buf, size_t buflen)
-{
- const struct sContext *pLastFile = pFileStack;
- int retcode;
- size_t len = buflen;
+ contextStack = contextStack->parent;
+ contextDepth--;
- while (pLastFile) {
- retcode = snprintf(&buf[buflen - len], len, "%s(%" PRId32 ") -> ",
- pLastFile->tzFileName, pLastFile->nLine);
- if (retcode < 0)
- fatalerror("Failed to dump file stack to string: %s\n", strerror(errno));
- else if (retcode >= len)
- len = 0;
- else
- len -= retcode;
- pLastFile = pLastFile->next;
+ lexer_DeleteState(context->lexerState);
+ /* Restore args if a macro (not REPT) saved them */
+ if (context->fileInfo->type == NODE_MACRO) {
+ dbgPrint("Restoring macro args %p\n", contextStack->macroArgs);
+ macro_UseNewArgs(contextStack->macroArgs);
}
+ /* Free the file stack node */
+ if (!context->fileInfo->referenced)
+ free(context->fileInfo);
+ /* Free the entry and make its parent the current entry */
+ free(context);
- retcode = snprintf(&buf[buflen - len], len, "%s(%" PRId32 ")",
- tzCurrentFileName, nLineNo);
- if (retcode < 0)
- fatalerror("Failed to dump file stack to string: %s\n", strerror(errno));
- else if (retcode >= len)
- len = 0;
- else
- len -= retcode;
-
- if (!len)
- warning(WARNING_LONG_STR, "File stack dump too long, got truncated\n");
+ lexer_SetState(contextStack->lexerState);
+ macro_SetUniqueID(contextStack->uniqueID);
+ return false;
}
/*
- * Dump the string expansion stack to stderr
+ * Make sure not to switch the lexer state before calling this, so the saved line no is correct
+ * BE CAREFUL!! This modifies the file stack directly, you should have set up the file info first
*/
-void fstk_DumpStringExpansions(void)
+static void newContext(struct FileStackNode *fileInfo)
{
- const struct sStringExpansionPos *pExpansion = pCurrentStringExpansion;
+ if (++contextDepth >= nMaxRecursionDepth)
+ fatalerror("Recursion limit (%zu) exceeded\n", nMaxRecursionDepth);
+ struct Context *context = malloc(sizeof(*context));
- while (pExpansion) {
- fprintf(stderr, "while expanding symbol \"%s\"\n",
- pExpansion->tzName);
- pExpansion = pExpansion->pParent;
- }
+ if (!context)
+ fatalerror("Failed to allocate memory for new context: %s\n", strerror(errno));
+ fileInfo->parent = contextStack->fileInfo;
+ fileInfo->lineNo = 0; /* Init to a default value, see struct definition for info */
+ fileInfo->referenced = false;
+ fileInfo->lineNo = lexer_GetLineNo();
+ context->fileInfo = fileInfo;
+ /*
+ * Link new entry to its parent so it's reachable later
+ * ERRORS SHOULD NOT OCCUR AFTER THIS!!
+ */
+ context->parent = contextStack;
+ contextStack = context;
+
}
-/*
- * Extra includepath stuff
- */
-void fstk_AddIncludePath(char *s)
+void fstk_RunInclude(char const *path)
{
- if (NextIncPath == MAXINCPATHS)
- fatalerror("Too many include directories passed from command line\n");
+ dbgPrint("Including path \"%s\"\n", path);
- // Find last occurrence of slash; is it at the end of the string?
- char const *lastSlash = strrchr(s, '/');
- char const *pattern = lastSlash && *(lastSlash + 1) == 0 ? "%s" : "%s/";
+ char *fullPath = NULL;
+ size_t size = 0;
- if (snprintf(IncludePaths[NextIncPath++], _MAX_PATH, pattern,
- s) >= _MAX_PATH)
- fatalerror("Include path too long '%s'\n", s);
-}
-
-static void printdep(const char *fileName)
-{
- if (dependfile) {
- fprintf(dependfile, "%s: %s\n", tzTargetFileName, fileName);
- if (oGeneratePhonyDeps)
- fprintf(dependfile, "%s:\n", fileName);
+ if (!fstk_FindFile(path, &fullPath, &size)) {
+ free(fullPath);
+ if (oGeneratedMissingIncludes)
+ oFailedOnMissingInclude = true;
+ else
+ error("Unable to open included file '%s': %s\n", path, strerror(errno));
+ return;
}
-}
+ dbgPrint("Full path: \"%s\"\n", fullPath);
-static FILE *getFile(char const *pathname)
-{
- struct stat statbuf;
+ struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + size);
- if (stat(pathname, &statbuf) != 0)
- return NULL;
+ if (!fileInfo) {
+ error("Failed to alloc file info for INCLUDE: %s\n", strerror(errno));
+ return;
+ }
+ fileInfo->node.type = NODE_FILE;
+ strcpy(fileInfo->name, fullPath);
+ free(fullPath);
- /* Reject directories */
- if (S_ISDIR(statbuf.st_mode))
- return NULL;
-
- return fopen(pathname, "rb");
+ newContext((struct FileStackNode *)fileInfo);
+ contextStack->lexerState = lexer_OpenFile(fileInfo->name);
+ if (!contextStack->lexerState)
+ fatalerror("Failed to set up lexer for file include\n");
+ lexer_SetStateAtEOL(contextStack->lexerState);
+ /* We're back at top-level, so most things are reset */
+ contextStack->uniqueID = 0;
+ macro_SetUniqueID(0);
}
-FILE *fstk_FindFile(char const *fname, char **incPathUsed)
+void fstk_RunMacro(char const *macroName, struct MacroArgs *args)
{
- if (fname == NULL)
- return NULL;
+ dbgPrint("Running macro \"%s\"\n", macroName);
- char path[_MAX_PATH];
- FILE *f = getFile(fname);
+ struct Symbol *macro = sym_FindSymbol(macroName);
- if (f) {
- printdep(fname);
- return f;
+ if (!macro) {
+ error("Macro \"%s\" not defined\n", macroName);
+ return;
}
+ if (macro->type != SYM_MACRO) {
+ error("\"%s\" is not a macro\n", macroName);
+ return;
+ }
+ contextStack->macroArgs = macro_GetCurrentArgs();
- for (size_t i = 0; i < NextIncPath; ++i) {
- /*
- * The function snprintf() does not write more than `size` bytes
- * (including the terminating null byte ('\0')). If the output
- * was truncated due to this limit, the return value is the
- * number of characters (excluding the terminating null byte)
- * which would have been written to the final string if enough
- * space had been available. Thus, a return value of `size` or
- * more means that the output was truncated.
- */
- int fullpathlen = snprintf(path, sizeof(path), "%s%s",
- IncludePaths[i], fname);
+ /* Compute total length of this node's name: <base name>::<macro> */
+ size_t reptNameLen = 0;
+ struct FileStackNode const *node = macro->src;
- if (fullpathlen >= (int)sizeof(path))
- continue;
+ if (node->type == NODE_REPT) {
+ struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node;
- f = getFile(path);
- if (f) {
- printdep(path);
+ /* 4294967295 = 2^32 - 1, aka UINT32_MAX */
+ reptNameLen += reptNode->reptDepth * strlen("::REPT~4294967295");
+ /* Look for next named node */
+ do {
+ node = node->parent;
+ } while (node->type == NODE_REPT);
+ }
+ struct FileStackNamedNode const *baseNode = (struct FileStackNamedNode const *)node;
+ size_t baseLen = strlen(baseNode->name);
+ size_t macroNameLen = strlen(macro->name);
+ struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + baseLen
+ + reptNameLen + 2 + macroNameLen + 1);
- if (incPathUsed)
- *incPathUsed = IncludePaths[i];
- return f;
- }
+ if (!fileInfo) {
+ error("Failed to alloc file info for \"%s\": %s\n", macro->name, strerror(errno));
+ return;
}
+ fileInfo->node.type = NODE_MACRO;
+ /* Print the name... */
+ char *dest = fileInfo->name;
- errno = ENOENT;
- if (oGeneratedMissingIncludes)
- printdep(fname);
- return NULL;
-}
+ memcpy(dest, baseNode->name, baseLen);
+ dest += baseLen;
+ if (node->type == NODE_REPT) {
+ struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node;
-/*
- * Set up an include file for parsing
- */
-void fstk_RunInclude(char *tzFileName)
-{
- char *incPathUsed = "";
- FILE *f = fstk_FindFile(tzFileName, &incPathUsed);
+ for (uint32_t i = reptNode->reptDepth; i--; ) {
+ int nbChars = sprintf(dest, "::REPT~%" PRIu32, reptNode->iters[i]);
- if (f == NULL) {
- if (oGeneratedMissingIncludes) {
- oFailedOnMissingInclude = true;
- return;
+ if (nbChars < 0)
+ fatalerror("Failed to write macro invocation info: %s\n",
+ strerror(errno));
+ dest += nbChars;
}
- error("Unable to open included file '%s': %s\n", tzFileName, strerror(errno));
- return;
}
+ *dest++ = ':';
+ *dest++ = ':';
+ memcpy(dest, macro->name, macroNameLen + 1);
- pushcontext();
- nLineNo = 1;
- nCurrentStatus = STAT_isInclude;
- snprintf(tzCurrentFileName, sizeof(tzCurrentFileName), "%s%s",
- incPathUsed, tzFileName);
- if (verbose)
- printf("Assembling %s\n", tzCurrentFileName);
- pCurrentFile = f;
- CurrentFlexHandle = yy_create_buffer(pCurrentFile);
- yy_switch_to_buffer(CurrentFlexHandle);
-
- /* Dirty hack to give the INCLUDE directive a linefeed */
-
- yyunput('\n');
- nLineNo--;
+ newContext((struct FileStackNode *)fileInfo);
+ /* Line minus 1 because buffer begins with a newline */
+ contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize,
+ macro->fileLine - 1);
+ if (!contextStack->lexerState)
+ fatalerror("Failed to set up lexer for macro invocation\n");
+ lexer_SetStateAtEOL(contextStack->lexerState);
+ contextStack->uniqueID = macro_UseNewUniqueID();
+ macro_UseNewArgs(args);
}
-/*
- * Set up a macro for parsing
- */
-void fstk_RunMacro(char *s, struct MacroArgs *args)
+void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size)
{
- struct Symbol const *sym = sym_FindSymbol(s);
- int nPrintedChars;
+ dbgPrint("Running REPT(%" PRIu32 ")\n", count);
+ if (count == 0)
+ return;
- if (sym == NULL) {
- error("Macro \"%s\" not defined\n", s);
+ uint32_t reptDepth = contextStack->fileInfo->type == NODE_REPT
+ ? ((struct FileStackReptNode *)contextStack->fileInfo)->reptDepth
+ : 0;
+ struct FileStackReptNode *fileInfo = malloc(sizeof(*fileInfo)
+ + (reptDepth + 1) * sizeof(fileInfo->iters[0]));
+
+ if (!fileInfo) {
+ error("Failed to alloc file info for REPT: %s\n", strerror(errno));
return;
}
- if (sym->type != SYM_MACRO) {
- error("\"%s\" is not a macro\n", s);
- return;
- }
+ fileInfo->node.type = NODE_REPT;
+ fileInfo->reptDepth = reptDepth + 1;
+ fileInfo->iters[0] = 1;
+ if (reptDepth)
+ /* Copy all parent iter counts */
+ memcpy(&fileInfo->iters[1],
+ ((struct FileStackReptNode *)contextStack->fileInfo)->iters,
+ reptDepth * sizeof(fileInfo->iters[0]));
- pushcontext();
- macro_SetUniqueID(nMacroCount++);
- /* Minus 1 because there is a newline at the beginning of the buffer */
- nLineNo = sym->fileLine - 1;
- macro_UseNewArgs(args);
- nCurrentStatus = STAT_isMacro;
- nPrintedChars = snprintf(tzCurrentFileName, _MAX_PATH + 1,
- "%s::%s", sym->fileName, s);
- if (nPrintedChars > _MAX_PATH) {
- popcontext();
- fatalerror("File name + macro name is too large to fit into buffer\n");
- }
+ newContext((struct FileStackNode *)fileInfo);
+ /* Correct our line number, which currently points to the `ENDR` line */
+ contextStack->fileInfo->lineNo = reptLineNo;
- pCurrentMacro = sym;
- /* TODO: why is `strlen` being used when there's a macro size field? */
- CurrentFlexHandle = yy_scan_bytes(pCurrentMacro->macro,
- strlen(pCurrentMacro->macro));
- yy_switch_to_buffer(CurrentFlexHandle);
-}
+ contextStack->lexerState = lexer_OpenFileView(body, size, reptLineNo);
+ if (!contextStack->lexerState)
+ fatalerror("Failed to set up lexer for rept block\n");
+ lexer_SetStateAtEOL(contextStack->lexerState);
+ contextStack->uniqueID = macro_UseNewUniqueID();
+ contextStack->nbReptIters = count;
-/*
- * Set up a repeat block for parsing
- */
-void fstk_RunRept(uint32_t count, int32_t nReptLineNo)
-{
- if (count) {
- static const char *tzReptStr = "::REPT~1";
-
- /* For error printing to make sense, fake nLineNo */
- nCurrentREPTBodyLastLine = nLineNo;
- nLineNo = nReptLineNo;
- pushcontext();
- macro_SetUniqueID(nMacroCount++);
- nCurrentREPTBlockCount = count;
- nCurrentStatus = STAT_isREPTBlock;
- nCurrentREPTBlockSize = ulNewMacroSize;
- pCurrentREPTBlock = tzNewMacro;
- nCurrentREPTBodyFirstLine = nReptLineNo + 1;
- nLineNo = nReptLineNo;
-
- if (strlen(tzCurrentFileName) + strlen(tzReptStr) > _MAX_PATH)
- fatalerror("Cannot append \"%s\" to file path\n", tzReptStr);
- strcat(tzCurrentFileName, tzReptStr);
-
- CurrentFlexHandle =
- yy_scan_bytes(pCurrentREPTBlock, nCurrentREPTBlockSize);
- yy_switch_to_buffer(CurrentFlexHandle);
- }
}
-/*
- * Initialize the filestack routines
- */
-void fstk_Init(char *pFileName)
+void fstk_Init(char const *mainPath, size_t maxRecursionDepth)
{
- char tzSymFileName[_MAX_PATH + 1 + 2];
+ struct LexerState *state = lexer_OpenFile(mainPath);
- char *c = pFileName;
- int fileNameIndex = 0;
+ if (!state)
+ fatalerror("Failed to open main file!\n");
+ lexer_SetState(state);
+ char const *fileName = lexer_GetFileName();
+ size_t len = strlen(fileName);
+ struct Context *context = malloc(sizeof(*contextStack));
+ struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + len + 1);
- tzSymFileName[fileNameIndex++] = '"';
+ if (!context)
+ fatalerror("Failed to allocate memory for main context: %s\n", strerror(errno));
+ if (!fileInfo)
+ fatalerror("Failed to allocate memory for main file info: %s\n", strerror(errno));
- // minus 2 to account for trailing "\"\0"
- // minus 1 to avoid a buffer overflow in extreme cases
- while (*c && fileNameIndex < sizeof(tzSymFileName) - 2 - 1) {
+ context->fileInfo = (struct FileStackNode *)fileInfo;
+ /* lineNo and reptIter are unused on the top-level context */
+ context->fileInfo->parent = NULL;
+ context->fileInfo->referenced = false;
+ context->fileInfo->type = NODE_FILE;
+ memcpy(fileInfo->name, fileName, len + 1);
- if (*c == '"') {
- tzSymFileName[fileNameIndex++] = '\\';
- }
+ context->parent = NULL;
+ context->lexerState = state;
+ context->uniqueID = 0;
+ macro_SetUniqueID(0);
+ context->nbReptIters = 0;
- tzSymFileName[fileNameIndex++] = *c;
- ++c;
- }
+ /* Now that it's set up properly, register the context */
+ contextStack = context;
- tzSymFileName[fileNameIndex++] = '"';
- tzSymFileName[fileNameIndex] = '\0';
-
- sym_AddString("__FILE__", tzSymFileName);
-
- pFileStack = NULL;
- if (strcmp(pFileName, "-") == 0) {
- pCurrentFile = stdin;
+ /*
+ * Check that max recursion depth won't allow overflowing node `malloc`s
+ * This assumes that the rept node is larger
+ */
+#define DEPTH_LIMIT ((SIZE_MAX - sizeof(struct FileStackReptNode)) / sizeof(uint32_t))
+ if (maxRecursionDepth > DEPTH_LIMIT) {
+ error("Recursion depth may not be higher than %zu, defaulting to "
+ EXPAND_AND_STR(DEFAULT_MAX_DEPTH) "\n", DEPTH_LIMIT);
+ nMaxRecursionDepth = DEFAULT_MAX_DEPTH;
} else {
- pCurrentFile = fopen(pFileName, "rb");
- if (pCurrentFile == NULL)
- fatalerror("Unable to open file '%s': %s\n", pFileName, strerror(errno));
+ nMaxRecursionDepth = maxRecursionDepth;
}
- nFileStackDepth = 0;
-
- nMacroCount = 0;
- nCurrentStatus = STAT_isInclude;
- snprintf(tzCurrentFileName, _MAX_PATH + 1, "%s", pFileName);
- CurrentFlexHandle = yy_create_buffer(pCurrentFile);
- yy_switch_to_buffer(CurrentFlexHandle);
- nLineNo = 1;
+ /* Make sure that the default of 64 is OK, though */
+ assert(DEPTH_LIMIT >= DEFAULT_MAX_DEPTH);
+#undef DEPTH_LIMIT
}
--- a/src/asm/globlex.c
+++ /dev/null
@@ -1,698 +1,0 @@
-/*
- * This file is part of RGBDS.
- *
- * Copyright (c) 1997-2018, Carsten Sorensen and RGBDS contributors.
- *
- * SPDX-License-Identifier: MIT
- */
-
-#include <math.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "asm/asm.h"
-#include "asm/lexer.h"
-#include "asm/macro.h"
-#include "asm/main.h"
-#include "asm/rpn.h"
-#include "asm/section.h"
-#include "asm/warning.h"
-
-#include "helpers.h"
-
-#include "asmy.h"
-
-bool oDontExpandStrings;
-int32_t nGBGfxID = -1;
-int32_t nBinaryID = -1;
-
-static int32_t gbgfx2bin(char ch)
-{
- int32_t i;
-
- for (i = 0; i <= 3; i++) {
- if (CurrentOptions.gbgfx[i] == ch)
- return i;
- }
-
- return 0;
-}
-
-static int32_t binary2bin(char ch)
-{
- int32_t i;
-
- for (i = 0; i <= 1; i++) {
- if (CurrentOptions.binary[i] == ch)
- return i;
- }
-
- return 0;
-}
-
-static int32_t char2bin(char ch)
-{
- if (ch >= 'a' && ch <= 'f')
- return (ch - 'a' + 10);
-
- if (ch >= 'A' && ch <= 'F')
- return (ch - 'A' + 10);
-
- if (ch >= '0' && ch <= '9')
- return (ch - '0');
-
- return 0;
-}
-
-typedef int32_t(*x2bin) (char ch);
-
-static int32_t ascii2bin(char *s)
-{
- char *start = s;
- uint32_t radix = 10;
- uint32_t result = 0;
- x2bin convertfunc = char2bin;
-
- switch (*s) {
- case '$':
- radix = 16;
- s++;
- convertfunc = char2bin;
- break;
- case '&':
- radix = 8;
- s++;
- convertfunc = char2bin;
- break;
- case '`':
- radix = 4;
- s++;
- convertfunc = gbgfx2bin;
- break;
- case '%':
- radix = 2;
- s++;
- convertfunc = binary2bin;
- break;
- default:
- /* Handle below */
- break;
- }
-
- const uint32_t max_q = UINT32_MAX / radix;
- const uint32_t max_r = UINT32_MAX % radix;
-
- if (*s == '\0') {
- /*
- * There are no digits after the radix prefix
- * (or the string is empty, which shouldn't happen).
- */
- error("Invalid integer constant\n");
- } else if (radix == 4) {
- int32_t size = 0;
- int32_t c;
-
- while (*s != '\0') {
- c = convertfunc(*s++);
- result = result * 2 + ((c & 2) << 7) + (c & 1);
- size++;
- }
-
- /*
- * Extending a graphics constant longer than 8 pixels,
- * the Game Boy tile width, produces a nonsensical result.
- */
- if (size > 8) {
- warning(WARNING_LARGE_CONSTANT, "Graphics constant '%s' is too long\n",
- start);
- }
- } else {
- bool overflow = false;
-
- while (*s != '\0') {
- int32_t digit = convertfunc(*s++);
-
- if (result > max_q
- || (result == max_q && digit > max_r)) {
- overflow = true;
- }
- result = result * radix + digit;
- }
-
- if (overflow)
- warning(WARNING_LARGE_CONSTANT, "Integer constant '%s' is too large\n",
- start);
- }
-
- return result;
-}
-
-uint32_t ParseFixedPoint(char *s, uint32_t size)
-{
- uint32_t i;
- uint32_t dot = 0;
-
- for (i = 0; i < size; i++) {
- if (s[i] == '.') {
- dot++;
-
- if (dot == 2)
- break;
- }
- }
-
- yyskipbytes(i);
-
- yylval.nConstValue = (int32_t)(atof(s) * 65536);
-
- return 1;
-}
-
-uint32_t ParseNumber(char *s, uint32_t size)
-{
- char dest[256];
-
- if (size > 255)
- fatalerror("Number token too long\n");
-
- strncpy(dest, s, size);
- dest[size] = 0;
- yylval.nConstValue = ascii2bin(dest);
-
- yyskipbytes(size);
-
- return 1;
-}
-
-/*
- * If the symbol name ends before the end of the macro arg,
- * return a pointer to the rest of the macro arg.
- * Otherwise, return NULL.
- */
-char const *AppendMacroArg(char whichArg, char *dest, size_t *destIndex)
-{
- char const *marg;
-
- if (whichArg == '@')
- marg = macro_GetUniqueIDStr();
- else if (whichArg >= '1' && whichArg <= '9')
- marg = macro_GetArg(whichArg - '0');
- else
- fatalerror("Invalid macro argument '\\%c' in symbol\n", whichArg);
-
- if (!marg)
- fatalerror("Macro argument '\\%c' not defined\n", whichArg);
-
- char ch;
-
- while ((ch = *marg) != 0) {
- if ((ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z')
- || (ch >= '0' && ch <= '9')
- || ch == '_'
- || ch == '@'
- || ch == '#'
- || ch == '.') {
- if (*destIndex >= MAXSYMLEN)
- fatalerror("Symbol too long\n");
-
- dest[*destIndex] = ch;
- (*destIndex)++;
- } else {
- return marg;
- }
-
- marg++;
- }
-
- return NULL;
-}
-
-uint32_t ParseSymbol(char *src, uint32_t size)
-{
- char dest[MAXSYMLEN + 1];
- size_t srcIndex = 0;
- size_t destIndex = 0;
- char const *rest = NULL;
-
- while (srcIndex < size) {
- char ch = src[srcIndex++];
-
- if (ch == '\\') {
- /*
- * We don't check if srcIndex is still less than size,
- * but that can only fail to be true when the
- * following char is neither '@' nor a digit.
- * In that case, AppendMacroArg() will catch the error.
- */
- ch = src[srcIndex++];
-
- rest = AppendMacroArg(ch, dest, &destIndex);
- /* If the symbol's end was in the middle of the token */
- if (rest)
- break;
- } else {
- if (destIndex >= MAXSYMLEN)
- fatalerror("Symbol too long\n");
- dest[destIndex++] = ch;
- }
- }
-
- dest[destIndex] = 0;
-
- /* Tell the lexer we read all bytes that we did */
- yyskipbytes(srcIndex);
-
- /*
- * If an escape's expansion left some chars after the symbol's end,
- * such as the `::` in a `Backup\1` expanded to `BackupCamX::`,
- * put those into the buffer.
- * Note that this NEEDS to be done after the `yyskipbytes` above.
- */
- if (rest)
- yyunputstr(rest);
-
- /* If the symbol is an EQUS, expand it */
- if (!oDontExpandStrings) {
- struct Symbol const *sym = sym_FindSymbol(dest);
-
- if (sym && sym->type == SYM_EQUS) {
- char const *s;
-
- lex_BeginStringExpansion(dest);
-
- /* Feed the symbol's contents into the buffer */
- yyunputstr(s = sym_GetStringValue(sym));
-
- /* Lines inserted this way shall not increase nLineNo */
- while (*s) {
- if (*s++ == '\n')
- nLineNo--;
- }
- return 0;
- }
- }
-
- strcpy(yylval.tzSym, dest);
- return 1;
-}
-
-uint32_t PutMacroArg(char *src, uint32_t size)
-{
- char const *s;
-
- yyskipbytes(size);
- if ((size == 2 && src[1] >= '1' && src[1] <= '9')) {
- s = macro_GetArg(src[1] - '0');
-
- if (s != NULL)
- yyunputstr(s);
- else
- error("Macro argument '\\%c' not defined\n", src[1]);
- } else {
- error("Invalid macro argument '\\%c'\n", src[1]);
- }
- return 0;
-}
-
-uint32_t PutUniqueID(char *src, uint32_t size)
-{
- (void)src;
- char const *s;
-
- yyskipbytes(size);
-
- s = macro_GetUniqueIDStr();
-
- if (s != NULL)
- yyunputstr(s);
- else
- error("Macro unique label string not defined\n");
-
- return 0;
-}
-
-enum {
- T_LEX_MACROARG = 3000,
- T_LEX_MACROUNIQUE
-};
-
-const struct sLexInitString lexer_strings[] = {
- {"adc", T_Z80_ADC},
- {"add", T_Z80_ADD},
- {"and", T_Z80_AND},
- {"bit", T_Z80_BIT},
- {"call", T_Z80_CALL},
- {"ccf", T_Z80_CCF},
- {"cpl", T_Z80_CPL},
- {"cp", T_Z80_CP},
- {"daa", T_Z80_DAA},
- {"dec", T_Z80_DEC},
- {"di", T_Z80_DI},
- {"ei", T_Z80_EI},
- {"halt", T_Z80_HALT},
- {"inc", T_Z80_INC},
- {"jp", T_Z80_JP},
- {"jr", T_Z80_JR},
- {"ld", T_Z80_LD},
- {"ldi", T_Z80_LDI},
- {"ldd", T_Z80_LDD},
- {"ldio", T_Z80_LDIO},
- {"ldh", T_Z80_LDIO},
- {"nop", T_Z80_NOP},
- {"or", T_Z80_OR},
- {"pop", T_Z80_POP},
- {"push", T_Z80_PUSH},
- {"res", T_Z80_RES},
- {"reti", T_Z80_RETI},
- {"ret", T_Z80_RET},
- {"rlca", T_Z80_RLCA},
- {"rlc", T_Z80_RLC},
- {"rla", T_Z80_RLA},
- {"rl", T_Z80_RL},
- {"rrc", T_Z80_RRC},
- {"rrca", T_Z80_RRCA},
- {"rra", T_Z80_RRA},
- {"rr", T_Z80_RR},
- {"rst", T_Z80_RST},
- {"sbc", T_Z80_SBC},
- {"scf", T_Z80_SCF},
- {"set", T_POP_SET},
- {"sla", T_Z80_SLA},
- {"sra", T_Z80_SRA},
- {"srl", T_Z80_SRL},
- {"stop", T_Z80_STOP},
- {"sub", T_Z80_SUB},
- {"swap", T_Z80_SWAP},
- {"xor", T_Z80_XOR},
-
- {"nz", T_CC_NZ},
- {"z", T_CC_Z},
- {"nc", T_CC_NC},
- /* Handled in list of registers */
- /* { "c", T_TOKEN_C }, */
-
- {"hli", T_MODE_HL_INC},
- {"hld", T_MODE_HL_DEC},
- {"$ff00+c", T_MODE_HW_C},
- {"$ff00 + c", T_MODE_HW_C},
- {"af", T_MODE_AF},
- {"bc", T_MODE_BC},
- {"de", T_MODE_DE},
- {"hl", T_MODE_HL},
- {"sp", T_MODE_SP},
-
- {"a", T_TOKEN_A},
- {"b", T_TOKEN_B},
- {"c", T_TOKEN_C},
- {"d", T_TOKEN_D},
- {"e", T_TOKEN_E},
- {"h", T_TOKEN_H},
- {"l", T_TOKEN_L},
-
- {"||", T_OP_LOGICOR},
- {"&&", T_OP_LOGICAND},
- {"==", T_OP_LOGICEQU},
- {">", T_OP_LOGICGT},
- {"<", T_OP_LOGICLT},
- {">=", T_OP_LOGICGE},
- {"<=", T_OP_LOGICLE},
- {"!=", T_OP_LOGICNE},
- {"!", T_OP_LOGICNOT},
- {"|", T_OP_OR},
- {"^", T_OP_XOR},
- {"&", T_OP_AND},
- {"<<", T_OP_SHL},
- {">>", T_OP_SHR},
- {"+", T_OP_ADD},
- {"-", T_OP_SUB},
- {"*", T_OP_MUL},
- {"/", T_OP_DIV},
- {"%", T_OP_MOD},
- {"~", T_OP_NOT},
-
- {"def", T_OP_DEF},
-
- {"fragment", T_POP_FRAGMENT},
- {"bank", T_OP_BANK},
- {"align", T_OP_ALIGN},
-
- {"round", T_OP_ROUND},
- {"ceil", T_OP_CEIL},
- {"floor", T_OP_FLOOR},
- {"div", T_OP_FDIV},
- {"mul", T_OP_FMUL},
- {"sin", T_OP_SIN},
- {"cos", T_OP_COS},
- {"tan", T_OP_TAN},
- {"asin", T_OP_ASIN},
- {"acos", T_OP_ACOS},
- {"atan", T_OP_ATAN},
- {"atan2", T_OP_ATAN2},
-
- {"high", T_OP_HIGH},
- {"low", T_OP_LOW},
- {"isconst", T_OP_ISCONST},
-
- {"strcmp", T_OP_STRCMP},
- {"strin", T_OP_STRIN},
- {"strsub", T_OP_STRSUB},
- {"strlen", T_OP_STRLEN},
- {"strcat", T_OP_STRCAT},
- {"strupr", T_OP_STRUPR},
- {"strlwr", T_OP_STRLWR},
-
- {"include", T_POP_INCLUDE},
- {"printt", T_POP_PRINTT},
- {"printi", T_POP_PRINTI},
- {"printv", T_POP_PRINTV},
- {"printf", T_POP_PRINTF},
- {"export", T_POP_EXPORT},
- {"xdef", T_POP_XDEF},
- {"global", T_POP_GLOBAL},
- {"ds", T_POP_DS},
- {"db", T_POP_DB},
- {"dw", T_POP_DW},
- {"dl", T_POP_DL},
- {"section", T_POP_SECTION},
- {"purge", T_POP_PURGE},
-
- {"rsreset", T_POP_RSRESET},
- {"rsset", T_POP_RSSET},
-
- {"incbin", T_POP_INCBIN},
- {"charmap", T_POP_CHARMAP},
- {"newcharmap", T_POP_NEWCHARMAP},
- {"setcharmap", T_POP_SETCHARMAP},
- {"pushc", T_POP_PUSHC},
- {"popc", T_POP_POPC},
-
- {"fail", T_POP_FAIL},
- {"warn", T_POP_WARN},
- {"fatal", T_POP_FATAL},
- {"assert", T_POP_ASSERT},
- {"static_assert", T_POP_STATIC_ASSERT},
-
- {"macro", T_POP_MACRO},
- /* Not needed but we have it here just to protect the name */
- {"endm", T_POP_ENDM},
- {"shift", T_POP_SHIFT},
-
- {"rept", T_POP_REPT},
- /* Not needed but we have it here just to protect the name */
- {"endr", T_POP_ENDR},
-
- {"load", T_POP_LOAD},
- {"endl", T_POP_ENDL},
-
- {"if", T_POP_IF},
- {"else", T_POP_ELSE},
- {"elif", T_POP_ELIF},
- {"endc", T_POP_ENDC},
-
- {"union", T_POP_UNION},
- {"nextu", T_POP_NEXTU},
- {"endu", T_POP_ENDU},
-
- {"wram0", T_SECT_WRAM0},
- {"vram", T_SECT_VRAM},
- {"romx", T_SECT_ROMX},
- {"rom0", T_SECT_ROM0},
- {"hram", T_SECT_HRAM},
- {"wramx", T_SECT_WRAMX},
- {"sram", T_SECT_SRAM},
- {"oam", T_SECT_OAM},
-
- {"rb", T_POP_RB},
- {"rw", T_POP_RW},
- {"equ", T_POP_EQU},
- {"equs", T_POP_EQUS},
-
- /* Handled before in list of CPU instructions */
- /* {"set", T_POP_SET}, */
- {"=", T_POP_EQUAL},
-
- {"pushs", T_POP_PUSHS},
- {"pops", T_POP_POPS},
- {"pusho", T_POP_PUSHO},
- {"popo", T_POP_POPO},
-
- {"opt", T_POP_OPT},
-
- {NULL, 0}
-};
-
-const struct sLexFloat tNumberToken = {
- ParseNumber,
- T_NUMBER
-};
-
-const struct sLexFloat tFixedPointToken = {
- ParseFixedPoint,
- T_NUMBER
-};
-
-const struct sLexFloat tIDToken = {
- ParseSymbol,
- T_ID
-};
-
-const struct sLexFloat tMacroArgToken = {
- PutMacroArg,
- T_LEX_MACROARG
-};
-
-const struct sLexFloat tMacroUniqueToken = {
- PutUniqueID,
- T_LEX_MACROUNIQUE
-};
-
-void setup_lexer(void)
-{
- uint32_t id;
-
- lex_Init();
- lex_AddStrings(lexer_strings);
-
- //Macro arguments
-
- id = lex_FloatAlloc(&tMacroArgToken);
- lex_FloatAddFirstRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '1', '9');
- id = lex_FloatAlloc(&tMacroUniqueToken);
- lex_FloatAddFirstRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '@', '@');
-
- //Decimal constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddRange(id, '0', '9');
-
- //Binary constants
-
- id = lex_FloatAlloc(&tNumberToken);
- nBinaryID = id;
- lex_FloatAddFirstRange(id, '%', '%');
- lex_FloatAddSecondRange(id, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddSecondRange(id, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
- lex_FloatAddRange(id, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddRange(id, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
-
- //Octal constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '&', '&');
- lex_FloatAddSecondRange(id, '0', '7');
- lex_FloatAddRange(id, '0', '7');
-
- //Gameboy gfx constants
-
- id = lex_FloatAlloc(&tNumberToken);
- nGBGfxID = id;
- lex_FloatAddFirstRange(id, '`', '`');
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[0],
- CurrentOptions.gbgfx[0]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[1],
- CurrentOptions.gbgfx[1]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[2],
- CurrentOptions.gbgfx[2]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[3],
- CurrentOptions.gbgfx[3]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[0], CurrentOptions.gbgfx[0]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[1], CurrentOptions.gbgfx[1]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[2], CurrentOptions.gbgfx[2]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[3], CurrentOptions.gbgfx[3]);
-
- //Hex constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '$', '$');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddSecondRange(id, 'A', 'F');
- lex_FloatAddSecondRange(id, 'a', 'f');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, 'A', 'F');
- lex_FloatAddRange(id, 'a', 'f');
-
- //ID 's
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, 'a', 'z');
- lex_FloatAddFirstRange(id, 'A', 'Z');
- lex_FloatAddFirstRange(id, '_', '_');
- lex_FloatAddSecondRange(id, '.', '.');
- lex_FloatAddSecondRange(id, 'a', 'z');
- lex_FloatAddSecondRange(id, 'A', 'Z');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '_', '_');
- lex_FloatAddSecondRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '@', '@');
- lex_FloatAddSecondRange(id, '#', '#');
- lex_FloatAddRange(id, '.', '.');
- lex_FloatAddRange(id, 'a', 'z');
- lex_FloatAddRange(id, 'A', 'Z');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, '_', '_');
- lex_FloatAddRange(id, '\\', '\\');
- lex_FloatAddRange(id, '@', '@');
- lex_FloatAddRange(id, '#', '#');
-
- //Local ID
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, '.', '.');
- lex_FloatAddSecondRange(id, 'a', 'z');
- lex_FloatAddSecondRange(id, 'A', 'Z');
- lex_FloatAddSecondRange(id, '_', '_');
- lex_FloatAddRange(id, 'a', 'z');
- lex_FloatAddRange(id, 'A', 'Z');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, '_', '_');
- lex_FloatAddRange(id, '\\', '\\');
- lex_FloatAddRange(id, '@', '@');
- lex_FloatAddRange(id, '#', '#');
-
- // "@"
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, '@', '@');
-
- //Fixed point constants
-
- id = lex_FloatAlloc(&tFixedPointToken);
- lex_FloatAddFirstRange(id, '.', '.');
- lex_FloatAddFirstRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '.', '.');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddRange(id, '.', '.');
- lex_FloatAddRange(id, '0', '9');
-}
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -1,1054 +1,2094 @@
/*
* This file is part of RGBDS.
*
- * Copyright (c) 1997-2019, Carsten Sorensen and RGBDS contributors.
+ * Copyright (c) 2020, Eldred Habert and RGBDS contributors.
*
* SPDX-License-Identifier: MIT
*/
+#include <sys/types.h>
+#include <sys/stat.h>
#include <assert.h>
#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
#include <inttypes.h>
-#include <stdio.h>
+#include <limits.h>
+#include <stdbool.h>
#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
+#include "extern/utf8decoder.h"
+#include "platform.h" /* For `ssize_t` */
+
#include "asm/asm.h"
-#include "asm/fstack.h"
#include "asm/lexer.h"
+#include "asm/fstack.h"
#include "asm/macro.h"
#include "asm/main.h"
#include "asm/rpn.h"
-#include "asm/section.h"
+#include "asm/symbol.h"
+#include "asm/util.h"
#include "asm/warning.h"
+/* Include this last so it gets all type & constant definitions */
+#include "asmy.h" /* For token definitions, generated from asmy.y */
-#include "extern/err.h"
+#ifdef LEXER_DEBUG
+ #define dbgPrint(...) fprintf(stderr, "[lexer] " __VA_ARGS__)
+#else
+ #define dbgPrint(...)
+#endif
-#include "asmy.h"
-#include "platform.h" // strncasecmp, strdup
+/* Neither MSVC nor MinGW provide `mmap` */
+#if defined(_MSC_VER) || defined(__MINGW32__)
+# include <windows.h>
+# include <fileapi.h>
+# include <winbase.h>
+# define MAP_FAILED NULL
+# define mapFile(ptr, fd, path, size) do { \
+ (ptr) = MAP_FAILED; \
+ HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, \
+ FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_RANDOM_ACCESS, NULL); \
+ HANDLE mappingObj; \
+ \
+ if (file == INVALID_HANDLE_VALUE) \
+ break; \
+ mappingObj = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); \
+ if (mappingObj != INVALID_HANDLE_VALUE) \
+ (ptr) = MapViewOfFile(mappingObj, FILE_MAP_READ, 0, 0, 0); \
+ CloseHandle(mappingObj); \
+ CloseHandle(file); \
+} while (0)
+# define munmap(ptr, size) UnmapViewOfFile((ptr))
-struct sLexString {
- char *tzName;
- uint32_t nToken;
- uint32_t nNameLength;
- struct sLexString *next;
+#else /* defined(_MSC_VER) || defined(__MINGW32__) */
+
+# include <sys/mman.h>
+# define mapFile(ptr, fd, path, size) do { \
+ (ptr) = mmap(NULL, (size), PROT_READ, MAP_PRIVATE, (fd), 0); \
+ \
+ if ((ptr) == MAP_FAILED && errno == ENOTSUP) { \
+ /*
+ * The implementation may not support MAP_PRIVATE; try again with MAP_SHARED
+ * instead, offering, I believe, weaker guarantees about external modifications to
+ * the file while reading it. That's still better than not opening it at all, though
+ */ \
+ if (verbose) \
+ printf("mmap(%s, MAP_PRIVATE) failed, retrying with MAP_SHARED\n", path); \
+ (ptr) = mmap(NULL, (size), PROT_READ, MAP_SHARED, (fd), 0); \
+ } \
+} while (0)
+#endif /* !( defined(_MSC_VER) || defined(__MINGW32__) ) */
+
+/*
+ * Identifiers that are also keywords are listed here. This ONLY applies to ones
+ * that would normally be matched as identifiers! Check out `yylex_NORMAL` to
+ * see how this is used.
+ * Tokens / keywords not handled here are handled in `yylex_NORMAL`'s switch.
+ */
+static struct KeywordMapping {
+ char const *name;
+ int token;
+} const keywords[] = {
+ /*
+ * CAUTION when editing this: adding keywords will probably require extra nodes in the
+ * `keywordDict` array. If you forget to, you will probably trip up an assertion, anyways.
+ * Also, all entries in this array must be in uppercase for the dict to build correctly.
+ */
+ {"ADC", T_Z80_ADC},
+ {"ADD", T_Z80_ADD},
+ {"AND", T_Z80_AND},
+ {"BIT", T_Z80_BIT},
+ {"CALL", T_Z80_CALL},
+ {"CCF", T_Z80_CCF},
+ {"CPL", T_Z80_CPL},
+ {"CP", T_Z80_CP},
+ {"DAA", T_Z80_DAA},
+ {"DEC", T_Z80_DEC},
+ {"DI", T_Z80_DI},
+ {"EI", T_Z80_EI},
+ {"HALT", T_Z80_HALT},
+ {"INC", T_Z80_INC},
+ {"JP", T_Z80_JP},
+ {"JR", T_Z80_JR},
+ {"LD", T_Z80_LD},
+ {"LDI", T_Z80_LDI},
+ {"LDD", T_Z80_LDD},
+ {"LDIO", T_Z80_LDIO},
+ {"LDH", T_Z80_LDIO},
+ {"NOP", T_Z80_NOP},
+ {"OR", T_Z80_OR},
+ {"POP", T_Z80_POP},
+ {"PUSH", T_Z80_PUSH},
+ {"RES", T_Z80_RES},
+ {"RETI", T_Z80_RETI},
+ {"RET", T_Z80_RET},
+ {"RLCA", T_Z80_RLCA},
+ {"RLC", T_Z80_RLC},
+ {"RLA", T_Z80_RLA},
+ {"RL", T_Z80_RL},
+ {"RRC", T_Z80_RRC},
+ {"RRCA", T_Z80_RRCA},
+ {"RRA", T_Z80_RRA},
+ {"RR", T_Z80_RR},
+ {"RST", T_Z80_RST},
+ {"SBC", T_Z80_SBC},
+ {"SCF", T_Z80_SCF},
+ {"SET", T_POP_SET},
+ {"SLA", T_Z80_SLA},
+ {"SRA", T_Z80_SRA},
+ {"SRL", T_Z80_SRL},
+ {"STOP", T_Z80_STOP},
+ {"SUB", T_Z80_SUB},
+ {"SWAP", T_Z80_SWAP},
+ {"XOR", T_Z80_XOR},
+
+ {"NZ", T_CC_NZ},
+ {"Z", T_CC_Z},
+ {"NC", T_CC_NC},
+ /* Handled in list of registers */
+ /* { "C", T_CC_C }, */
+
+ {"AF", T_MODE_AF},
+ {"BC", T_MODE_BC},
+ {"DE", T_MODE_DE},
+ {"HL", T_MODE_HL},
+ {"SP", T_MODE_SP},
+ {"HLD", T_MODE_HL_DEC},
+ {"HLI", T_MODE_HL_INC},
+
+ {"A", T_TOKEN_A},
+ {"B", T_TOKEN_B},
+ {"C", T_TOKEN_C},
+ {"D", T_TOKEN_D},
+ {"E", T_TOKEN_E},
+ {"H", T_TOKEN_H},
+ {"L", T_TOKEN_L},
+
+ {"DEF", T_OP_DEF},
+
+ {"FRAGMENT", T_POP_FRAGMENT},
+ {"BANK", T_OP_BANK},
+ {"ALIGN", T_OP_ALIGN},
+
+ {"ROUND", T_OP_ROUND},
+ {"CEIL", T_OP_CEIL},
+ {"FLOOR", T_OP_FLOOR},
+ {"DIV", T_OP_FDIV},
+ {"MUL", T_OP_FMUL},
+ {"SIN", T_OP_SIN},
+ {"COS", T_OP_COS},
+ {"TAN", T_OP_TAN},
+ {"ASIN", T_OP_ASIN},
+ {"ACOS", T_OP_ACOS},
+ {"ATAN", T_OP_ATAN},
+ {"ATAN2", T_OP_ATAN2},
+
+ {"HIGH", T_OP_HIGH},
+ {"LOW", T_OP_LOW},
+ {"ISCONST", T_OP_ISCONST},
+
+ {"STRCMP", T_OP_STRCMP},
+ {"STRIN", T_OP_STRIN},
+ {"STRSUB", T_OP_STRSUB},
+ {"STRLEN", T_OP_STRLEN},
+ {"STRCAT", T_OP_STRCAT},
+ {"STRUPR", T_OP_STRUPR},
+ {"STRLWR", T_OP_STRLWR},
+
+ {"INCLUDE", T_POP_INCLUDE},
+ {"PRINTT", T_POP_PRINTT},
+ {"PRINTI", T_POP_PRINTI},
+ {"PRINTV", T_POP_PRINTV},
+ {"PRINTF", T_POP_PRINTF},
+ {"EXPORT", T_POP_EXPORT},
+ {"XDEF", T_POP_XDEF},
+ {"GLOBAL", T_POP_GLOBAL},
+ {"DS", T_POP_DS},
+ {"DB", T_POP_DB},
+ {"DW", T_POP_DW},
+ {"DL", T_POP_DL},
+ {"SECTION", T_POP_SECTION},
+ {"PURGE", T_POP_PURGE},
+
+ {"RSRESET", T_POP_RSRESET},
+ {"RSSET", T_POP_RSSET},
+
+ {"INCBIN", T_POP_INCBIN},
+ {"CHARMAP", T_POP_CHARMAP},
+ {"NEWCHARMAP", T_POP_NEWCHARMAP},
+ {"SETCHARMAP", T_POP_SETCHARMAP},
+ {"PUSHC", T_POP_PUSHC},
+ {"POPC", T_POP_POPC},
+
+ {"FAIL", T_POP_FAIL},
+ {"WARN", T_POP_WARN},
+ {"FATAL", T_POP_FATAL},
+ {"ASSERT", T_POP_ASSERT},
+ {"STATIC_ASSERT", T_POP_STATIC_ASSERT},
+
+ {"MACRO", T_POP_MACRO},
+ {"ENDM", T_POP_ENDM},
+ {"SHIFT", T_POP_SHIFT},
+
+ {"REPT", T_POP_REPT},
+ {"ENDR", T_POP_ENDR},
+
+ {"LOAD", T_POP_LOAD},
+ {"ENDL", T_POP_ENDL},
+
+ {"IF", T_POP_IF},
+ {"ELSE", T_POP_ELSE},
+ {"ELIF", T_POP_ELIF},
+ {"ENDC", T_POP_ENDC},
+
+ {"UNION", T_POP_UNION},
+ {"NEXTU", T_POP_NEXTU},
+ {"ENDU", T_POP_ENDU},
+
+ {"WRAM0", T_SECT_WRAM0},
+ {"VRAM", T_SECT_VRAM},
+ {"ROMX", T_SECT_ROMX},
+ {"ROM0", T_SECT_ROM0},
+ {"HRAM", T_SECT_HRAM},
+ {"WRAMX", T_SECT_WRAMX},
+ {"SRAM", T_SECT_SRAM},
+ {"OAM", T_SECT_OAM},
+
+ {"RB", T_POP_RB},
+ {"RW", T_POP_RW},
+ {"EQU", T_POP_EQU},
+ {"EQUS", T_POP_EQUS},
+
+ /* Handled before in list of CPU instructions */
+ /* {"SET", T_POP_SET}, */
+
+ {"PUSHS", T_POP_PUSHS},
+ {"POPS", T_POP_POPS},
+ {"PUSHO", T_POP_PUSHO},
+ {"POPO", T_POP_POPO},
+
+ {"OPT", T_POP_OPT}
};
-#define pLexBufferRealStart (pCurrentBuffer->pBufferRealStart)
-#define pLexBuffer (pCurrentBuffer->pBuffer)
-#define AtLineStart (pCurrentBuffer->oAtLineStart)
+static bool isWhitespace(int c)
+{
+ return c == ' ' || c == '\t';
+}
-#define SAFETYMARGIN 1024
+#define LEXER_BUF_SIZE 42 /* TODO: determine a sane value for this */
+/* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */
+static_assert(LEXER_BUF_SIZE <= SSIZE_MAX, "Lexer buffer size is too large");
-#define BOM_SIZE 3
+struct Expansion {
+ struct Expansion *firstChild;
+ struct Expansion *next;
+ char *name;
+ char const *contents;
+ size_t len;
+ size_t totalLen;
+ size_t distance; /* Distance between the beginning of this expansion and of its parent */
+ uint8_t skip; /* How many extra characters to skip after the expansion is over */
+};
-struct sLexFloat tLexFloat[32];
-struct sLexString *tLexHash[LEXHASHSIZE];
-YY_BUFFER_STATE pCurrentBuffer;
-uint32_t nLexMaxLength; // max length of all keywords and operators
+struct LexerState {
+ char const *path;
-uint32_t tFloatingSecondChar[256];
-uint32_t tFloatingFirstChar[256];
-uint32_t tFloatingChars[256];
-uint32_t nFloating;
-enum eLexerState lexerstate = LEX_STATE_NORMAL;
+ /* mmap()-dependent IO state */
+ bool isMmapped;
+ union {
+ struct { /* If mmap()ed */
+ char *ptr; /* Technically `const` during the lexer's execution */
+ off_t size;
+ off_t offset;
+ bool isReferenced; /* If a macro in this file requires not unmapping it */
+ };
+ struct { /* Otherwise */
+ int fd;
+ size_t index; /* Read index into the buffer */
+ char buf[LEXER_BUF_SIZE]; /* Circular buffer */
+ size_t nbChars; /* Number of "fresh" chars in the buffer */
+ };
+ };
-struct sStringExpansionPos *pCurrentStringExpansion;
-static unsigned int nNbStringExpansions;
+ /* Common state */
+ bool isFile;
-/* UTF-8 byte order mark */
-static const unsigned char bom[BOM_SIZE] = { 0xEF, 0xBB, 0xBF };
+ enum LexerMode mode;
+ bool atLineStart;
+ uint32_t lineNo;
+ uint32_t colNo;
+ int lastToken;
-void upperstring(char *s)
+ bool capturing; /* Whether the text being lexed should be captured */
+ size_t captureSize; /* Amount of text captured */
+ char *captureBuf; /* Buffer to send the captured text to if non-NULL */
+ size_t captureCapacity; /* Size of the buffer above */
+
+ bool disableMacroArgs;
+ size_t macroArgScanDistance; /* Max distance already scanned for macro args */
+ bool expandStrings;
+ struct Expansion *expansions;
+ size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */
+};
+
+struct LexerState *lexerState = NULL;
+struct LexerState *lexerStateEOL = NULL;
+
+static void initState(struct LexerState *state)
{
- while (*s) {
- *s = toupper(*s);
- s++;
- }
+ state->mode = LEXER_NORMAL;
+ state->atLineStart = true; /* yylex() will init colNo due to this */
+ state->lastToken = 0;
+
+ state->capturing = false;
+ state->captureBuf = NULL;
+
+ state->disableMacroArgs = false;
+ state->macroArgScanDistance = 0;
+ state->expandStrings = true;
+ state->expansions = NULL;
+ state->expansionOfs = 0;
}
-void lowerstring(char *s)
+struct LexerState *lexer_OpenFile(char const *path)
{
- while (*s) {
- *s = tolower(*s);
- s++;
+ dbgPrint("Opening file \"%s\"\n", path);
+
+ bool isStdin = !strcmp(path, "-");
+ struct LexerState *state = malloc(sizeof(*state));
+ struct stat fileInfo;
+
+ /* Give stdin a nicer file name */
+ if (isStdin)
+ path = "<stdin>";
+ if (!state) {
+ error("Failed to allocate memory for lexer state: %s\n", strerror(errno));
+ return NULL;
}
+ if (!isStdin && stat(path, &fileInfo) != 0) {
+ error("Failed to stat file \"%s\": %s\n", path, strerror(errno));
+ free(state);
+ return NULL;
+ }
+ state->path = path;
+ state->isFile = true;
+ state->fd = isStdin ? STDIN_FILENO : open(path, O_RDONLY);
+ state->isMmapped = false; /* By default, assume it won't be mmap()ed */
+ if (!isStdin && fileInfo.st_size > 0) {
+ /* Try using `mmap` for better performance */
+
+ /*
+ * Important: do NOT assign to `state->ptr` directly, to avoid a cast that may
+ * alter an eventual `MAP_FAILED` value. It would also invalidate `state->fd`,
+ * being on the other side of the union.
+ */
+ void *mappingAddr;
+
+ mapFile(mappingAddr, state->fd, state->path, fileInfo.st_size);
+ if (mappingAddr == MAP_FAILED) {
+ /* If mmap()ing failed, try again using another method (below) */
+ state->isMmapped = false;
+ } else {
+ /* IMPORTANT: the `union` mandates this is accessed before other members! */
+ close(state->fd);
+
+ state->isMmapped = true;
+ state->ptr = mappingAddr;
+ state->size = fileInfo.st_size;
+ state->offset = 0;
+
+ if (verbose)
+ printf("File %s successfully mmap()ped\n", path);
+ }
+ }
+ if (!state->isMmapped) {
+ /* Sometimes mmap() fails or isn't available, so have a fallback */
+ if (verbose)
+ printf("File %s opened as regular, errno reports \"%s\"\n",
+ path, strerror(errno));
+ state->index = 0;
+ state->nbChars = 0;
+ }
+
+ initState(state);
+ state->lineNo = 0; /* Will be incremented at first line start */
+ return state;
}
-void yyskipbytes(uint32_t count)
+struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo)
{
- pLexBuffer += count;
+ dbgPrint("Opening view on buffer \"%.*s\"[...]\n", size < 16 ? (int)size : 16, buf);
+
+ struct LexerState *state = malloc(sizeof(*state));
+
+ if (!state) {
+ error("Failed to allocate memory for lexer state: %s\n", strerror(errno));
+ return NULL;
+ }
+ // TODO: init `path`
+
+ state->isFile = false;
+ state->isMmapped = true; /* It's not *really* mmap()ed, but it behaves the same */
+ state->ptr = buf;
+ state->size = size;
+ state->offset = 0;
+
+ initState(state);
+ state->lineNo = lineNo; /* Will be incremented at first line start */
+ return state;
}
-void yyunputbytes(uint32_t count)
+void lexer_RestartRept(uint32_t lineNo)
{
- pLexBuffer -= count;
+ dbgPrint("Restarting REPT\n");
+ lexerState->offset = 0;
+ initState(lexerState);
+ lexerState->lineNo = lineNo;
}
-void yyunput(char c)
+void lexer_DeleteState(struct LexerState *state)
{
- if (pLexBuffer <= pLexBufferRealStart)
- fatalerror("Buffer safety margin exceeded\n");
-
- *(--pLexBuffer) = c;
+ if (!state->isMmapped)
+ close(state->fd);
+ else if (state->isFile && !state->isReferenced)
+ munmap(state->ptr, state->size);
+ free(state);
}
-void yyunputstr(const char *s)
-{
- int32_t len;
-
- len = strlen(s);
-
+struct KeywordDictNode {
/*
- * It would be undefined behavior to subtract `len` from pLexBuffer and
- * potentially have it point outside of pLexBufferRealStart's buffer,
- * this is why the check is done this way.
- * Refer to https://github.com/rednex/rgbds/pull/411#discussion_r319779797
+ * The identifier charset is (currently) 44 characters big. By storing entries for the
+ * entire printable ASCII charset, minus lower-case due to case-insensitivity,
+ * we only waste (0x60 - 0x20) - 70 = 20 entries per node, which should be acceptable.
+ * In turn, this allows greatly simplifying checking an index into this array,
+ * which should help speed up the lexer.
*/
- if (pLexBuffer - pLexBufferRealStart < len)
- fatalerror("Buffer safety margin exceeded\n");
+ uint16_t children[0x60 - ' '];
+ struct KeywordMapping const *keyword;
+/* Since the keyword structure is invariant, the min number of nodes is known at compile time */
+} keywordDict[338] = {0}; /* Make sure to keep this correct when adding keywords! */
- pLexBuffer -= len;
-
- memcpy(pLexBuffer, s, len);
+/* Convert a char into its index into the dict */
+static inline uint8_t dictIndex(char c)
+{
+ /* Translate uppercase to lowercase (roughly) */
+ if (c > 0x60)
+ c = c - ('a' - 'A');
+ return c - ' ';
}
-/*
- * Marks that a new string expansion with name `tzName` ends here
- * Enforces recursion depth
- */
-void lex_BeginStringExpansion(const char *tzName)
+void lexer_Init(void)
{
- if (++nNbStringExpansions > nMaxRecursionDepth)
- fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth);
+ /*
+ * Build the dictionary of keywords. This could be done at compile time instead, however:
+ * - Doing so manually is a task nobody wants to undertake
+ * - It would be massively hard to read
+ * - Doing it within CC or CPP would be quite non-trivial
+ * - Doing it externally would require some extra work to use only POSIX tools
+ * - The startup overhead isn't much compared to the program's
+ */
+ uint16_t usedNodes = 1;
- struct sStringExpansionPos *pNewStringExpansion =
- malloc(sizeof(*pNewStringExpansion));
- char *tzNewExpansionName = strdup(tzName);
+ for (size_t i = 0; i < sizeof(keywords) / sizeof(*keywords); i++) {
+ uint16_t nodeID = 0;
- if (!pNewStringExpansion || !tzNewExpansionName)
- fatalerror("Could not allocate memory to expand '%s'\n", tzName);
+ /* Walk the dictionary, creating intermediate nodes for the keyword */
+ for (char const *ptr = keywords[i].name; *ptr; ptr++) {
+ /* We should be able to assume all entries are well-formed */
+ if (keywordDict[nodeID].children[*ptr - ' '] == 0) {
+ /*
+ * If this gets tripped up, set the size of keywordDict to
+ * something high, compile with `-DPRINT_NODE_COUNT` (see below),
+ * and set the size to that.
+ */
+ assert(usedNodes < sizeof(keywordDict) / sizeof(*keywordDict));
- pNewStringExpansion->tzName = tzNewExpansionName;
- pNewStringExpansion->pBuffer = pLexBufferRealStart;
- pNewStringExpansion->pBufferPos = pLexBuffer;
- pNewStringExpansion->pParent = pCurrentStringExpansion;
+ /* There is no node at that location, grab one from the pool */
+ keywordDict[nodeID].children[*ptr - ' '] = usedNodes;
+ usedNodes++;
+ }
+ nodeID = keywordDict[nodeID].children[*ptr - ' '];
+ }
- pCurrentStringExpansion = pNewStringExpansion;
+ /* This assumes that no two keywords have the same name */
+ keywordDict[nodeID].keyword = &keywords[i];
+ }
+
+#ifdef PRINT_NODE_COUNT /* For the maintainer to check how many nodes are needed */
+ printf("Lexer keyword dictionary: %zu keywords in %u nodes (pool size %zu)\n",
+ sizeof(keywords) / sizeof(*keywords), usedNodes,
+ sizeof(keywordDict) / sizeof(*keywordDict));
+#endif
}
-void yy_switch_to_buffer(YY_BUFFER_STATE buf)
+void lexer_SetMode(enum LexerMode mode)
{
- pCurrentBuffer = buf;
+ lexerState->mode = mode;
}
-void yy_set_state(enum eLexerState i)
+void lexer_ToggleStringExpansion(bool enable)
{
- lexerstate = i;
+ lexerState->expandStrings = enable;
}
-void yy_delete_buffer(YY_BUFFER_STATE buf)
+/* Functions for the actual lexer to obtain characters */
+
+static void reallocCaptureBuf(void)
{
- free(buf->pBufferStart - SAFETYMARGIN);
- free(buf);
+ if (lexerState->captureCapacity == SIZE_MAX)
+ fatalerror("Cannot grow capture buffer past %zu bytes\n", SIZE_MAX);
+ else if (lexerState->captureCapacity > SIZE_MAX / 2)
+ lexerState->captureCapacity = SIZE_MAX;
+ else
+ lexerState->captureCapacity *= 2;
+ lexerState->captureBuf = realloc(lexerState->captureBuf, lexerState->captureCapacity);
+ if (!lexerState->captureBuf)
+ fatalerror("realloc error while resizing capture buffer: %s\n", strerror(errno));
}
/*
- * Maintains the following invariants:
- * 1. nBufferSize < capacity
- * 2. The buffer is terminated with 0
- * 3. nBufferSize is the size without the terminator
+ * The multiple evaluations of `retvar` causing side effects is INTENTIONAL, and
+ * required for example by `lexer_dumpStringExpansions`. It is however only
+ * evaluated once per level, and only then.
+ *
+ * This uses the concept of "X macros": you must #define LOOKUP_PRE_NEST and
+ * LOOKUP_POST_NEST before invoking this (and #undef them right after), and
+ * those macros will be expanded at the corresponding points in the loop.
+ * This is necessary because there are at least 3 places which need to iterate
+ * through iterations while performing custom actions
*/
-static void yy_buffer_append(YY_BUFFER_STATE buf, size_t capacity, char c)
+#define lookupExpansion(retvar, dist) do { \
+ struct Expansion *exp = lexerState->expansions; \
+ \
+ for (;;) { \
+ /* Find the closest expansion whose end is after the target */ \
+ while (exp && exp->totalLen + exp->distance <= (dist)) { \
+ (dist) -= exp->totalLen + exp->skip; \
+ exp = exp->next; \
+ } \
+ \
+ /* If there is none, or it begins after the target, return the previous level */ \
+ if (!exp || exp->distance > (dist)) \
+ break; \
+ \
+ /* We know we are inside of that expansion */ \
+ (dist) -= exp->distance; /* Distances are relative to their parent */ \
+ \
+ /* Otherwise, register this expansion and repeat the process */ \
+ LOOKUP_PRE_NEST(exp); \
+ (retvar) = exp; \
+ if (!exp->firstChild) /* If there are no children, this is it */ \
+ break; \
+ exp = exp->firstChild; \
+ \
+ LOOKUP_POST_NEST(exp); \
+ } \
+} while (0)
+
+static struct Expansion *getExpansionAtDistance(size_t *distance)
{
- assert(buf->pBufferStart[buf->nBufferSize] == 0);
- assert(buf->nBufferSize + 1 < capacity);
+ struct Expansion *expansion = NULL; /* Top level has no "previous" level */
- buf->pBufferStart[buf->nBufferSize++] = c;
- buf->pBufferStart[buf->nBufferSize] = 0;
-}
+#define LOOKUP_PRE_NEST(exp)
+#define LOOKUP_POST_NEST(exp)
+ struct Expansion *exp = lexerState->expansions;
-static void yy_buffer_append_newlines(YY_BUFFER_STATE buf, size_t capacity)
-{
- /* Add newline if file doesn't end with one */
- if (buf->nBufferSize == 0
- || buf->pBufferStart[buf->nBufferSize - 1] != '\n')
- yy_buffer_append(buf, capacity, '\n');
+ for (;;) {
+ /* Find the closest expansion whose end is after the target */
+ while (exp && exp->totalLen + exp->distance <= *distance) {
+ *distance -= exp->totalLen - exp->skip;
+ exp = exp->next;
+ }
- /* Add newline if \ will eat the last newline */
- if (buf->nBufferSize >= 2) {
- size_t pos = buf->nBufferSize - 2;
+ /* If there is none, or it begins after the target, return the previous level */
+ if (!exp || exp->distance > *distance)
+ break;
- /* Skip spaces and tabs */
- while (pos > 0 && (buf->pBufferStart[pos] == ' '
- || buf->pBufferStart[pos] == '\t'))
- pos--;
+ /* We know we are inside of that expansion */
+ *distance -= exp->distance; /* Distances are relative to their parent */
- if (buf->pBufferStart[pos] == '\\')
- yy_buffer_append(buf, capacity, '\n');
+ /* Otherwise, register this expansion and repeat the process */
+ LOOKUP_PRE_NEST(exp);
+ expansion = exp;
+ if (!exp->firstChild) /* If there are no children, this is it */
+ break;
+ exp = exp->firstChild;
+
+ LOOKUP_POST_NEST(exp);
}
+#undef LOOKUP_PRE_NEST
+#undef LOOKUP_POST_NEST
+
+ return expansion;
}
-YY_BUFFER_STATE yy_scan_bytes(char const *mem, uint32_t size)
+static void beginExpansion(size_t distance, uint8_t skip,
+ char const *str, size_t size, char const *name)
{
- YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state));
+ distance += lexerState->expansionOfs; /* Distance argument is relative to read offset! */
+ /* Increase the total length of all parents, and return the topmost one */
+ struct Expansion *parent = NULL;
+ unsigned int depth = 0;
- if (pBuffer == NULL)
- fatalerror("%s: Out of memory!\n", __func__);
+#define LOOKUP_PRE_NEST(exp) (exp)->totalLen += size - skip
+#define LOOKUP_POST_NEST(exp) do { \
+ if (name && ++depth >= nMaxRecursionDepth) \
+ fatalerror("Recursion limit (%u) exceeded\n", nMaxRecursionDepth); \
+} while (0)
+ lookupExpansion(parent, distance);
+#undef LOOKUP_PRE_NEST
+#undef LOOKUP_POST_NEST
+ struct Expansion **insertPoint = parent ? &parent->firstChild : &lexerState->expansions;
- size_t capacity = size + 3; /* space for 2 newlines and terminator */
+ /* We know we are in none of the children expansions: add ourselves, keeping it sorted */
+ while (*insertPoint && (*insertPoint)->distance < distance)
+ insertPoint = &(*insertPoint)->next;
- pBuffer->pBufferRealStart = malloc(capacity + SAFETYMARGIN);
+ *insertPoint = malloc(sizeof(**insertPoint));
+ if (!*insertPoint)
+ fatalerror("Unable to allocate new expansion: %s\n", strerror(errno));
+ (*insertPoint)->firstChild = NULL;
+ (*insertPoint)->next = NULL; /* Expansions are always performed left to right */
+ (*insertPoint)->name = name ? strdup(name) : NULL;
+ (*insertPoint)->contents = str;
+ (*insertPoint)->len = size;
+ (*insertPoint)->totalLen = size;
+ (*insertPoint)->distance = distance;
+ (*insertPoint)->skip = skip;
- if (pBuffer->pBufferRealStart == NULL)
- fatalerror("%s: Out of memory for buffer!\n", __func__);
+ /* If expansion is the new closest one, update offset */
+ if (insertPoint == &lexerState->expansions)
+ lexerState->expansionOfs = 0;
+}
- pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN;
- pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN;
- memcpy(pBuffer->pBuffer, mem, size);
- pBuffer->pBuffer[size] = 0;
- pBuffer->nBufferSize = size;
- yy_buffer_append_newlines(pBuffer, capacity);
- pBuffer->oAtLineStart = 1;
+static void freeExpansion(struct Expansion *expansion)
+{
+ struct Expansion *child = expansion->firstChild;
- return pBuffer;
+ while (child) {
+ struct Expansion *next = child->next;
+
+ freeExpansion(child);
+ child = next;
+ }
+ free(expansion->name);
+ free(expansion);
}
-YY_BUFFER_STATE yy_create_buffer(FILE *f)
+static char const *expandMacroArg(char name, size_t distance)
{
- YY_BUFFER_STATE pBuffer = malloc(sizeof(struct yy_buffer_state));
+ char const *str;
- if (pBuffer == NULL)
- fatalerror("%s: Out of memory!\n", __func__);
+ if (name == '@')
+ str = macro_GetUniqueIDStr();
+ else if (name == '0')
+ fatalerror("Invalid macro argument '\\0'\n");
+ else
+ str = macro_GetArg(name - '0');
+ if (!str)
+ fatalerror("Macro argument '\\%c' not defined\n", name);
- size_t size = 0, capacity = -1;
- char *buf = NULL;
+ beginExpansion(distance, 2, str, strlen(str), NULL);
+ return str;
+}
- /*
- * Check if we can get the file size without implementation-defined
- * behavior:
- *
- * From ftell(3p):
- * [On error], ftell() and ftello() shall return −1, and set errno to
- * indicate the error.
- *
- * The ftell() and ftello() functions shall fail if: [...]
- * ESPIPE The file descriptor underlying stream is associated with a
- * pipe, FIFO, or socket.
- *
- * From fseek(3p):
- * The behavior of fseek() on devices which are incapable of seeking
- * is implementation-defined.
- */
- if (ftell(f) != -1) {
- fseek(f, 0, SEEK_END);
- capacity = ftell(f);
- rewind(f);
+/* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */
+static int peekInternal(uint8_t distance)
+{
+ if (distance >= LEXER_BUF_SIZE)
+ fatalerror("Internal lexer error: buffer has insufficient size for peeking (%"
+ PRIu8 " >= %u)\n", distance, LEXER_BUF_SIZE);
+
+ size_t ofs = lexerState->expansionOfs + distance;
+ struct Expansion const *expansion = getExpansionAtDistance(&ofs);
+
+ if (expansion) {
+ assert(ofs < expansion->len);
+ return expansion->contents[ofs];
}
- // If ftell errored or the block above wasn't executed
- if (capacity == -1)
- capacity = 4096;
- // Handle 0-byte files gracefully
- else if (capacity == 0)
- capacity = 1;
+ distance = ofs;
- do {
- if (buf == NULL || size >= capacity) {
- if (buf)
- capacity *= 2;
- /* Give extra room for 2 newlines and terminator */
- buf = realloc(buf, capacity + SAFETYMARGIN + 3);
+ if (lexerState->isMmapped) {
+ if (lexerState->offset + distance >= lexerState->size)
+ return EOF;
- if (buf == NULL)
- fatalerror("%s: Out of memory for buffer!\n",
- __func__);
- }
+ return (unsigned char)lexerState->ptr[lexerState->offset + distance];
+ }
- char *bufpos = buf + SAFETYMARGIN + size;
- size_t read_count = fread(bufpos, 1, capacity - size, f);
+ if (lexerState->nbChars <= distance) {
+ /* Buffer isn't full enough, read some chars in */
+ size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */
- if (read_count == 0 && !feof(f))
- fatalerror("%s: fread error\n", __func__);
+ /* Compute the index we'll start writing to */
+ size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE;
+ ssize_t nbCharsRead = 0, totalCharsRead = 0;
- size += read_count;
- } while (!feof(f));
+#define readChars(size) do { \
+ /* This buffer overflow made me lose WEEKS of my life. Never again. */ \
+ assert(writeIndex + (size) <= LEXER_BUF_SIZE); \
+ nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \
+ if (nbCharsRead == -1) \
+ fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \
+ totalCharsRead += nbCharsRead; \
+ writeIndex += nbCharsRead; \
+ if (writeIndex == LEXER_BUF_SIZE) \
+ writeIndex = 0; \
+ target -= nbCharsRead; \
+} while (0)
- pBuffer->pBufferRealStart = buf;
- pBuffer->pBufferStart = buf + SAFETYMARGIN;
- pBuffer->pBuffer = buf + SAFETYMARGIN;
- pBuffer->pBuffer[size] = 0;
- pBuffer->nBufferSize = size;
+ /* If the range to fill passes over the buffer wrapping point, we need two reads */
+ if (writeIndex + target > LEXER_BUF_SIZE) {
+ size_t nbExpectedChars = LEXER_BUF_SIZE - writeIndex;
- /* This is added here to make the buffer scaling above easy to express,
- * while taking the newline space into account
- * for the yy_buffer_append_newlines() call below.
- */
- capacity += 3;
+ readChars(nbExpectedChars);
+ /* If the read was incomplete, don't perform a second read */
+ if (nbCharsRead < nbExpectedChars)
+ target = 0;
+ }
+ if (target != 0)
+ readChars(target);
- /* Skip UTF-8 byte order mark. */
- if (pBuffer->nBufferSize >= BOM_SIZE
- && !memcmp(pBuffer->pBuffer, bom, BOM_SIZE))
- pBuffer->pBuffer += BOM_SIZE;
+#undef readChars
- /* Convert all line endings to LF and spaces */
+ lexerState->nbChars += totalCharsRead;
- char *mem = pBuffer->pBuffer;
- int32_t lineCount = 0;
+ /* If there aren't enough chars even after refilling, give up */
+ if (lexerState->nbChars <= distance)
+ return EOF;
+ }
+ return (unsigned char)lexerState->buf[(lexerState->index + distance) % LEXER_BUF_SIZE];
+}
- while (*mem) {
- if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) {
- mem += 2;
- } else {
- /* LF CR and CR LF */
- if (((mem[0] == '\n') && (mem[1] == '\r'))
- || ((mem[0] == '\r') && (mem[1] == '\n'))) {
- *mem++ = ' ';
- *mem++ = '\n';
- lineCount++;
- /* LF and CR */
- } else if ((mem[0] == '\n') || (mem[0] == '\r')) {
- *mem++ = '\n';
- lineCount++;
+static int peek(uint8_t distance)
+{
+ int c = peekInternal(distance);
+
+ if (distance >= lexerState->macroArgScanDistance) {
+ lexerState->macroArgScanDistance = distance + 1; /* Do not consider again */
+ /* If enabled and character is a backslash, check for a macro arg */
+ if (!lexerState->disableMacroArgs && c == '\\') {
+ distance++;
+ lexerState->macroArgScanDistance++;
+ c = peekInternal(distance);
+ if (c == '@' || (c >= '0' && c <= '9')) {
+ /* Expand the argument and return its first character */
+ char const *str = expandMacroArg(c, distance - 1);
+
+ /*
+ * Assuming macro args can't be recursive (I'll be damned if a way
+ * is found...), then we mark the entire macro arg as scanned;
+ * however, the two macro arg characters (\1) will be ignored,
+ * so they shouldn't be counted in the scan distance!
+ */
+ lexerState->macroArgScanDistance += strlen(str) - 2;
+ /* WARNING: this assumes macro args can't be empty!! */
+ c = str[0];
} else {
- mem++;
+ c = '\\';
}
}
}
+ return c;
+}
- if (mem != pBuffer->pBuffer + size) {
- nLineNo = lineCount + 1;
- fatalerror("Found null character\n");
+static void shiftChars(uint8_t distance)
+{
+ if (lexerState->capturing) {
+ if (lexerState->captureBuf) {
+ if (lexerState->captureSize + distance >= lexerState->captureCapacity)
+ reallocCaptureBuf();
+ /* TODO: improve this? */
+ for (uint8_t i = 0; i < distance; i++)
+ lexerState->captureBuf[lexerState->captureSize++] = peek(i);
+ } else {
+ lexerState->captureSize += distance;
+ }
}
- /* Remove comments */
+ lexerState->macroArgScanDistance -= distance;
- mem = pBuffer->pBuffer;
- bool instring = false;
+ /* FIXME: this may not be too great, as only the top level is considered... */
- while (*mem) {
- if (*mem == '\"')
- instring = !instring;
+ /*
+ * The logic is as follows:
+ * - Any characters up to the expansion need to be consumed in the file
+ * - If some remain after that, advance the offset within the expansion
+ * - If that goes *past* the expansion, then leftovers shall be consumed in the file
+ * - If we went past the expansion, we're back to square one, and should re-do all
+ */
+nextExpansion:
+ if (lexerState->expansions) {
+ /* If the read cursor reaches into the expansion, update offset */
+ if (distance > lexerState->expansions->distance) {
+ /* distance = <file chars (expansion distance)> + <expansion chars> */
+ lexerState->expansionOfs += distance - lexerState->expansions->distance;
+ distance = lexerState->expansions->distance; /* Nb chars to read in file */
+ /* Now, check if the expansion finished being read */
+ if (lexerState->expansionOfs >= lexerState->expansions->totalLen) {
+ /* Add the leftovers to the distance */
+ distance += lexerState->expansionOfs;
+ distance -= lexerState->expansions->totalLen;
+ /* Also add in the post-expansion skip */
+ distance += lexerState->expansions->skip;
+ /* Move on to the next expansion */
+ struct Expansion *next = lexerState->expansions->next;
- if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) {
- mem += 2;
- } else if (instring) {
- mem++;
- } else {
- /* Comments that start with ; anywhere in a line */
- if (*mem == ';') {
- while (!((*mem == '\n') || (*mem == '\0')))
- *mem++ = ' ';
- /* Comments that start with * at the start of a line */
- } else if ((mem[0] == '\n') && (mem[1] == '*')) {
- warning(WARNING_OBSOLETE,
- "'*' is deprecated for comments, please use ';' instead\n");
- mem++;
- while (!((*mem == '\n') || (*mem == '\0')))
- *mem++ = ' ';
- } else {
- mem++;
+ freeExpansion(lexerState->expansions);
+ lexerState->expansions = next;
+ /* Reset the offset for the next expansion */
+ lexerState->expansionOfs = 0;
+ /* And repeat, in case we also go into or over the next expansion */
+ goto nextExpansion;
}
}
+ /* Getting closer to the expansion */
+ lexerState->expansions->distance -= distance;
+ /* Now, `distance` is how many bytes to move forward **in the file** */
}
- yy_buffer_append_newlines(pBuffer, capacity);
- pBuffer->oAtLineStart = 1;
- return pBuffer;
+ if (lexerState->isMmapped) {
+ lexerState->offset += distance;
+ } else {
+ lexerState->index += distance;
+ lexerState->colNo += distance;
+ /* Wrap around if necessary */
+ if (lexerState->index >= LEXER_BUF_SIZE)
+ lexerState->index %= LEXER_BUF_SIZE;
+ assert(lexerState->nbChars >= distance);
+ lexerState->nbChars -= distance;
+ }
}
-uint32_t lex_FloatAlloc(const struct sLexFloat *token)
+static int nextChar(void)
{
- tLexFloat[nFloating] = *token;
+ int c = peek(0);
- return (1 << (nFloating++));
+ /* If not at EOF, advance read position */
+ if (c != EOF)
+ shiftChars(1);
+ return c;
}
-/*
- * Make sure that only non-zero ASCII characters are used. Also, check if the
- * start is greater than the end of the range.
- */
-bool lex_CheckCharacterRange(uint16_t start, uint16_t end)
-{
- if (start > end || start < 1 || end > 127) {
- error("Invalid character range (start: %" PRIu16 ", end: %" PRIu16 ")\n",
- start, end);
- return false;
- }
- return true;
-}
+/* "Services" provided by the lexer to the rest of the program */
-void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end)
+char const *lexer_GetFileName(void)
{
- if (lex_CheckCharacterRange(start, end)) {
- while (start <= end) {
- tFloatingChars[start] &= ~id;
- start++;
- }
- }
+ return lexerState ? lexerState->path : NULL;
}
-void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end)
+uint32_t lexer_GetLineNo(void)
{
- if (lex_CheckCharacterRange(start, end)) {
- while (start <= end) {
- tFloatingChars[start] |= id;
- start++;
- }
- }
+ return lexerState->lineNo;
}
-void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end)
+uint32_t lexer_GetColNo(void)
{
- if (lex_CheckCharacterRange(start, end)) {
- while (start <= end) {
- tFloatingFirstChar[start] &= ~id;
- start++;
- }
- }
+ return lexerState->colNo;
}
-void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end)
+void lexer_DumpStringExpansions(void)
{
- if (lex_CheckCharacterRange(start, end)) {
- while (start <= end) {
- tFloatingFirstChar[start] |= id;
- start++;
- }
- }
+ if (!lexerState)
+ return;
+ struct Expansion *stack[nMaxRecursionDepth + 1];
+ struct Expansion *expansion; /* Temp var for `lookupExpansion` */
+ unsigned int depth = 0;
+ size_t distance = lexerState->expansionOfs;
+
+#define LOOKUP_PRE_NEST(exp) do { \
+ /* Only register EQUS expansions, not string args */ \
+ if ((exp)->name) \
+ stack[depth++] = (exp); \
+} while (0)
+#define LOOKUP_POST_NEST(exp)
+ lookupExpansion(expansion, distance);
+ (void)expansion;
+#undef LOOKUP_PRE_NEST
+#undef LOOKUP_POST_NEST
+
+ while (depth--)
+ fprintf(stderr, "while expanding symbol \"%s\"\n", stack[depth]->name);
}
-void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end)
+/* Function to discard all of a line's comments */
+
+static void discardComment(void)
{
- if (lex_CheckCharacterRange(start, end)) {
- while (start <= end) {
- tFloatingSecondChar[start] &= ~id;
- start++;
- }
+ dbgPrint("Discarding comment\n");
+ lexerState->disableMacroArgs = true;
+ for (;;) {
+ int c = peek(0);
+
+ if (c == EOF || c == '\r' || c == '\n')
+ break;
+ shiftChars(1);
}
+ lexerState->disableMacroArgs = false;
}
-void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end)
+/* Function to read a line continuation */
+
+static void readLineContinuation(void)
{
- if (lex_CheckCharacterRange(start, end)) {
- while (start <= end) {
- tFloatingSecondChar[start] |= id;
- start++;
+ dbgPrint("Beginning line continuation\n");
+ for (;;) {
+ int c = peek(0);
+
+ if (isWhitespace(c)) {
+ shiftChars(1);
+ } else if (c == '\r' || c == '\n') {
+ shiftChars(1);
+ if (c == '\r' && peek(0) == '\n')
+ shiftChars(1);
+ if (!lexerState->expansions
+ || lexerState->expansions->distance)
+ lexerState->lineNo++;
+ return;
+ } else if (c == ';') {
+ discardComment();
+ } else {
+ error("Begun line continuation, but encountered character '%s'\n",
+ print(c));
+ return;
}
}
}
-static struct sLexFloat *lexgetfloat(uint32_t nFloatMask)
+/* Functions to lex numbers of various radixes */
+
+static void readNumber(int radix, int32_t baseValue)
{
- if (nFloatMask == 0)
- fatalerror("Internal error in %s\n", __func__);
+ uint32_t value = baseValue;
- int32_t i = 0;
+ for (;;) {
+ int c = peek(0);
- while ((nFloatMask & 1) == 0) {
- nFloatMask >>= 1;
- i++;
+ if (c < '0' || c > '0' + radix - 1)
+ break;
+ if (value > (UINT32_MAX - (c - '0')) / radix)
+ warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n");
+ value = value * radix + (c - '0');
+
+ shiftChars(1);
}
- return &tLexFloat[i];
+ yylval.nConstValue = value;
}
-static uint32_t lexcalchash(char *s)
+static void readFractionalPart(void)
{
- uint32_t hash = 0;
+ uint32_t value = 0, divisor = 1;
- while (*s)
- hash = (hash * 283) ^ toupper(*s++);
+ dbgPrint("Reading fractional part\n");
+ for (;;) {
+ int c = peek(0);
- return hash % LEXHASHSIZE;
+ if (c < '0' || c > '9')
+ break;
+ shiftChars(1);
+ if (divisor > (UINT32_MAX - (c - '0')) / 10) {
+ warning(WARNING_LARGE_CONSTANT,
+ "Precision of fixed-point constant is too large\n");
+ /* Discard any additional digits */
+ while (c = peek(0), c >= '0' && c <= '9')
+ shiftChars(1);
+ break;
+ }
+ value = value * 10 + (c - '0');
+ divisor *= 10;
+ }
+
+ if (yylval.nConstValue > INT16_MAX || yylval.nConstValue < INT16_MIN)
+ warning(WARNING_LARGE_CONSTANT, "Magnitude of fixed-point constant is too large\n");
+
+ /* Cast to unsigned avoids UB if shifting discards bits */
+ yylval.nConstValue = (uint32_t)yylval.nConstValue << 16;
+ /* Cast to unsigned avoids undefined overflow behavior */
+ uint16_t fractional = value * 65536 / divisor;
+
+ yylval.nConstValue |= fractional * (yylval.nConstValue >= 0 ? 1 : -1);
}
-void lex_Init(void)
+char const *binDigits;
+
+static void readBinaryNumber(void)
{
- uint32_t i;
+ uint32_t value = 0;
- for (i = 0; i < LEXHASHSIZE; i++)
- tLexHash[i] = NULL;
+ dbgPrint("Reading binary number with digits [%c,%c]\n", binDigits[0], binDigits[1]);
+ for (;;) {
+ int c = peek(0);
+ int bit;
- for (i = 0; i < 256; i++) {
- tFloatingFirstChar[i] = 0;
- tFloatingSecondChar[i] = 0;
- tFloatingChars[i] = 0;
+ if (c == binDigits[0])
+ bit = 0;
+ else if (c == binDigits[1])
+ bit = 1;
+ else
+ break;
+ if (value > (UINT32_MAX - bit) / 2)
+ warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n");
+ value = value * 2 + bit;
+
+ shiftChars(1);
}
- nLexMaxLength = 0;
- nFloating = 0;
-
- pCurrentStringExpansion = NULL;
- nNbStringExpansions = 0;
+ yylval.nConstValue = value;
}
-void lex_AddStrings(const struct sLexInitString *lex)
+static void readHexNumber(void)
{
- while (lex->tzName) {
- struct sLexString **ppHash;
- uint32_t hash = lexcalchash(lex->tzName);
+ uint32_t value = 0;
+ bool empty = true;
- ppHash = &tLexHash[hash];
- while (*ppHash)
- ppHash = &((*ppHash)->next);
+ dbgPrint("Reading hex number\n");
+ for (;;) {
+ int c = peek(0);
- *ppHash = malloc(sizeof(struct sLexString));
- if (*ppHash == NULL)
- fatalerror("Out of memory!\n");
+ if (c >= 'a' && c <= 'f') /* Convert letters to right after digits */
+ c = c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ c = c - 'A' + 10;
+ else if (c >= '0' && c <= '9')
+ c = c - '0';
+ else
+ break;
- (*ppHash)->tzName = (char *)strdup(lex->tzName);
- if ((*ppHash)->tzName == NULL)
- fatalerror("Out of memory!\n");
+ if (value > (UINT32_MAX - c) / 16)
+ warning(WARNING_LARGE_CONSTANT, "Integer constant is too large\n");
+ value = value * 16 + c;
- (*ppHash)->nNameLength = strlen(lex->tzName);
- (*ppHash)->nToken = lex->nToken;
- (*ppHash)->next = NULL;
+ shiftChars(1);
+ empty = false;
+ }
- upperstring((*ppHash)->tzName);
+ if (empty)
+ error("Invalid integer constant, no digits after '$'\n");
- if ((*ppHash)->nNameLength > nLexMaxLength)
- nLexMaxLength = (*ppHash)->nNameLength;
-
- lex++;
- }
+ yylval.nConstValue = value;
}
-/*
- * Gets the "float" mask and "float" length.
- * "Float" refers to the token type of a token that is not a keyword.
- * The character classes floatingFirstChar, floatingSecondChar, and
- * floatingChars are defined separately for each token type.
- * It uses bit masks to match against a set of simple regular expressions
- * of the form /[floatingFirstChar]([floatingSecondChar][floatingChars]*)?/.
- * The token types with the longest match from the current position in the
- * buffer will have their bits set in the float mask.
- */
-void yylex_GetFloatMaskAndFloatLen(uint32_t *pnFloatMask, uint32_t *pnFloatLen)
+char const *gfxDigits;
+
+static void readGfxConstant(void)
{
- /*
- * Note that '\0' should always have a bit mask of 0 in the "floating"
- * tables, so it doesn't need to be checked for separately.
- */
+ uint32_t bp0 = 0, bp1 = 0;
+ uint8_t width = 0;
- char *s = pLexBuffer;
- uint32_t nOldFloatMask = 0;
- uint32_t nFloatMask = tFloatingFirstChar[(uint8_t)*s];
+ dbgPrint("Reading gfx constant with digits [%c,%c,%c,%c]\n",
+ gfxDigits[0], gfxDigits[1], gfxDigits[2], gfxDigits[3]);
+ for (;;) {
+ int c = peek(0);
+ uint32_t pixel;
- if (nFloatMask != 0) {
- s++;
- nOldFloatMask = nFloatMask;
- nFloatMask &= tFloatingSecondChar[(uint8_t)*s];
+ if (c == gfxDigits[0])
+ pixel = 0;
+ else if (c == gfxDigits[1])
+ pixel = 1;
+ else if (c == gfxDigits[2])
+ pixel = 2;
+ else if (c == gfxDigits[3])
+ pixel = 3;
+ else
+ break;
- while (nFloatMask != 0) {
- s++;
- nOldFloatMask = nFloatMask;
- nFloatMask &= tFloatingChars[(uint8_t)*s];
+ if (width < 8) {
+ bp0 = bp0 << 1 | (pixel & 1);
+ bp1 = bp1 << 1 | (pixel >> 1);
}
+ if (width < 9)
+ width++;
+ shiftChars(1);
}
- *pnFloatMask = nOldFloatMask;
- *pnFloatLen = (uint32_t)(s - pLexBuffer);
+ if (width == 0)
+ error("Invalid graphics constant, no digits after '`'\n");
+ else if (width == 9)
+ warning(WARNING_LARGE_CONSTANT,
+ "Graphics constant is too long, only 8 first pixels considered\n");
+
+ yylval.nConstValue = bp1 << 8 | bp0;
}
-/*
- * Gets the longest keyword/operator from the current position in the buffer.
- */
-struct sLexString *yylex_GetLongestFixed(void)
-{
- struct sLexString *pLongestFixed = NULL;
- char *s = pLexBuffer;
- uint32_t hash = 0;
- uint32_t length = 0;
+/* Function to read identifiers & keywords */
- while (length < nLexMaxLength && *s) {
- hash = (hash * 283) ^ toupper(*s);
- s++;
- length++;
-
- struct sLexString *lex = tLexHash[hash % LEXHASHSIZE];
-
- while (lex) {
- if (lex->nNameLength == length
- && strncasecmp(pLexBuffer, lex->tzName, length) == 0) {
- pLongestFixed = lex;
- break;
- }
- lex = lex->next;
- }
- }
-
- return pLongestFixed;
+static bool startsIdentifier(int c)
+{
+ return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_';
}
-size_t CopyMacroArg(char *dest, size_t maxLength, char c)
+static int readIdentifier(char firstChar)
{
+ dbgPrint("Reading identifier or keyword\n");
+ /* Lex while checking for a keyword */
+ yylval.tzSym[0] = firstChar;
+ uint16_t nodeID = keywordDict[0].children[dictIndex(firstChar)];
+ int tokenType = firstChar == '.' ? T_LOCAL_ID : T_ID;
size_t i;
- char const *s;
- if (c == '@')
- s = macro_GetUniqueIDStr();
- else if (c >= '1' && c <= '9')
- s = macro_GetArg(c - '0');
- else
- return 0;
+ for (i = 1; ; i++) {
+ int c = peek(0);
- if (s == NULL)
- fatalerror("Macro argument '\\%c' not defined\n", c);
+ /* If that char isn't in the symbol charset, end */
+ if ((c > '9' || c < '0')
+ && (c > 'Z' || c < 'A')
+ && (c > 'z' || c < 'a')
+ && c != '#' && c != '.' && c != '@' && c != '_')
+ break;
+ shiftChars(1);
- // TODO: `strncpy`, nay?
- for (i = 0; s[i] != 0; i++) {
- if (i >= maxLength)
- fatalerror("Macro argument too long to fit buffer\n");
+ /* Write the char to the identifier's name */
+ if (i < sizeof(yylval.tzSym) - 1)
+ yylval.tzSym[i] = c;
- dest[i] = s[i];
+ /* If the char was a dot, mark the identifier as local */
+ if (c == '.')
+ tokenType = T_LOCAL_ID;
+
+ /* Attempt to traverse the tree to check for a keyword */
+ if (nodeID) /* Do nothing if matching already failed */
+ nodeID = keywordDict[nodeID].children[dictIndex(c)];
}
- return i;
-}
+ if (i > sizeof(yylval.tzSym) - 1) {
+ warning(WARNING_LONG_STR, "Symbol name too long, got truncated\n");
+ i = sizeof(yylval.tzSym) - 1;
+ }
+ yylval.tzSym[i] = '\0'; /* Terminate the string */
+ dbgPrint("Ident/keyword = \"%s\"\n", yylval.tzSym);
-static inline void yylex_StringWriteChar(char *s, size_t index, char c)
-{
- if (index >= MAXSTRLEN)
- fatalerror("String too long\n");
+ if (keywordDict[nodeID].keyword)
+ return keywordDict[nodeID].keyword->token;
- s[index] = c;
+ return tokenType;
}
-static inline void yylex_SymbolWriteChar(char *s, size_t index, char c)
-{
- if (index >= MAXSYMLEN)
- fatalerror("Symbol too long\n");
+/* Functions to read strings */
- s[index] = c;
-}
+enum PrintType {
+ TYPE_NONE,
+ TYPE_DECIMAL, /* d */
+ TYPE_UPPERHEX, /* X */
+ TYPE_LOWERHEX, /* x */
+ TYPE_BINARY, /* b */
+};
-/*
- * Trims white space at the end of a string.
- * The index parameter is the index of the 0 at the end of the string.
- */
-void yylex_TrimEnd(char *s, size_t index)
+static void intToString(char *dest, size_t bufSize, struct Symbol const *sym, enum PrintType type)
{
- int32_t i = (int32_t)index - 1;
+ uint32_t value = sym_GetConstantSymValue(sym);
+ int fullLength;
- while ((i >= 0) && (s[i] == ' ' || s[i] == '\t')) {
- s[i] = 0;
- i--;
+ /* Special cheat for binary */
+ if (type == TYPE_BINARY) {
+ char binary[33]; /* 32 bits + 1 terminator */
+ char *write_ptr = binary + 32;
+
+ fullLength = 0;
+ binary[32] = 0;
+ do {
+ *(--write_ptr) = (value & 1) + '0';
+ value >>= 1;
+ fullLength++;
+ } while (value);
+ strncpy(dest, write_ptr, bufSize - 1);
+ } else {
+ static char const * const formats[] = {
+ [TYPE_NONE] = "$%" PRIX32,
+ [TYPE_DECIMAL] = "%" PRId32,
+ [TYPE_UPPERHEX] = "%" PRIX32,
+ [TYPE_LOWERHEX] = "%" PRIx32
+ };
+
+ fullLength = snprintf(dest, bufSize, formats[type], value);
+ if (fullLength < 0) {
+ error("snprintf encoding error: %s\n", strerror(errno));
+ dest[0] = '\0';
+ }
}
+
+ if ((size_t)fullLength >= bufSize)
+ warning(WARNING_LONG_STR, "Interpolated symbol %s too long to fit buffer\n",
+ sym->name);
}
-size_t yylex_ReadBracketedSymbol(char *dest, size_t index)
+static char const *readInterpolation(void)
{
- char sym[MAXSYMLEN + 1];
- char ch;
+ char symName[MAXSYMLEN + 1];
size_t i = 0;
- size_t length, maxLength;
- const char *mode = NULL;
+ enum PrintType type = TYPE_NONE;
- for (ch = *pLexBuffer;
- ch != '}' && ch != '"' && ch != '\n';
- ch = *(++pLexBuffer)) {
- if (ch == '\\') {
- ch = *(++pLexBuffer);
- maxLength = MAXSYMLEN - i;
- length = CopyMacroArg(&sym[i], maxLength, ch);
+ for (;;) {
+ int c = peek(0);
- if (length != 0)
- i += length;
- else
- fatalerror("Illegal character escape '%c'\n", ch);
- } else if (ch == '{') {
- /* Handle nested symbols */
- ++pLexBuffer;
- i += yylex_ReadBracketedSymbol(sym, i);
- --pLexBuffer;
- } else if (ch == ':' && !mode) { /* Only grab 1st colon */
- /* Use a whitelist of modes, which does prevent the
- * use of some features such as precision,
- * but also avoids a security flaw
- */
- const char *acceptedModes = "bxXd";
- /* Binary isn't natively supported,
- * so it's handled differently
- */
- static const char * const formatSpecifiers[] = {
- "", "%" PRIx32, "%" PRIX32, "%" PRId32
- };
- /* Prevent reading out of bounds! */
- const char *designatedMode;
+ if (c == '{') { /* Nested interpolation */
+ shiftChars(1);
+ char const *inner = readInterpolation();
- if (i != 1)
- fatalerror("Print types are exactly 1 character long\n");
-
- designatedMode = strchr(acceptedModes, sym[i - 1]);
- if (!designatedMode)
- fatalerror("Illegal print type '%c'\n", sym[i - 1]);
- mode = formatSpecifiers[designatedMode - acceptedModes];
- /* Begin writing the symbol again */
- i = 0;
+ if (inner) {
+ while (*inner) {
+ if (i == sizeof(symName))
+ break;
+ symName[i++] = *inner++;
+ }
+ }
+ } else if (c == EOF || c == '\r' || c == '\n' || c == '"') {
+ error("Missing }\n");
+ break;
+ } else if (c == '}') {
+ shiftChars(1);
+ break;
+ } else if (c == ':' && type == TYPE_NONE) { /* Print type, only once */
+ if (i != 1) {
+ error("Print types are exactly 1 character long\n");
+ } else {
+ switch (symName[0]) {
+ case 'b':
+ type = TYPE_BINARY;
+ break;
+ case 'd':
+ type = TYPE_DECIMAL;
+ break;
+ case 'X':
+ type = TYPE_UPPERHEX;
+ break;
+ case 'x':
+ type = TYPE_LOWERHEX;
+ break;
+ default:
+ error("Invalid print type '%s'\n", print(symName[0]));
+ }
+ }
+ i = 0; /* Now that type has been set, restart at beginning of string */
+ shiftChars(1);
} else {
- yylex_SymbolWriteChar(sym, i++, ch);
+ if (i < sizeof(symName)) /* Allow writing an extra char to flag overflow */
+ symName[i++] = c;
+ shiftChars(1);
}
}
- /* Properly terminate the string */
- yylex_SymbolWriteChar(sym, i, 0);
+ if (i == sizeof(symName)) {
+ warning(WARNING_LONG_STR, "Symbol name too long\n");
+ i--;
+ }
+ symName[i] = '\0';
- /* It's assumed we're writing to a T_STRING */
- maxLength = MAXSTRLEN - index;
- length = symvaluetostring(&dest[index], maxLength, sym, mode);
+ struct Symbol const *sym = sym_FindSymbol(symName);
- if (*pLexBuffer == '}')
- pLexBuffer++;
- else
- fatalerror("Missing }\n");
+ if (!sym) {
+ error("Interpolated symbol \"%s\" does not exist\n", symName);
+ } else if (sym->type == SYM_EQUS) {
+ if (type != TYPE_NONE)
+ error("Print types are only allowed for numbers\n");
+ return sym_GetStringValue(sym);
+ } else if (sym_IsNumeric(sym)) {
+ static char buf[33]; /* Worst case of 32 digits + terminator */
- return length;
+ intToString(buf, sizeof(buf), sym, type);
+ return buf;
+ } else {
+ error("Only numerical and string symbols can be interpolated\n");
+ }
+ return NULL;
}
-static void yylex_ReadQuotedString(void)
+static void readString(void)
{
- size_t index = 0;
- size_t length, maxLength;
+ size_t i = 0;
- while (*pLexBuffer != '"' && *pLexBuffer != '\n') {
- char ch = *pLexBuffer++;
+ dbgPrint("Reading string\n");
+ for (;;) {
+ int c = peek(0);
- if (ch == '\\') {
- ch = *pLexBuffer++;
+ switch (c) {
+ case '"':
+ shiftChars(1);
+ if (i == sizeof(yylval.tzString)) {
+ i--;
+ warning(WARNING_LONG_STR, "String constant too long\n");
+ }
+ yylval.tzString[i] = '\0';
+ dbgPrint("Read string \"%s\"\n", yylval.tzString);
+ return;
+ case '\r':
+ case '\n': /* Do not shift these! */
+ case EOF:
+ if (i == sizeof(yylval.tzString)) {
+ i--;
+ warning(WARNING_LONG_STR, "String constant too long\n");
+ }
+ yylval.tzString[i] = '\0';
+ error("Unterminated string\n");
+ dbgPrint("Read string \"%s\"\n", yylval.tzString);
+ return;
- switch (ch) {
+ case '\\': /* Character escape */
+ c = peek(1);
+ switch (c) {
+ case '\\': /* Return that character unchanged */
+ case '"':
+ case '{':
+ case '}':
+ shiftChars(1);
+ break;
case 'n':
- ch = '\n';
+ c = '\n';
+ shiftChars(1);
break;
case 'r':
- ch = '\r';
+ c = '\r';
+ shiftChars(1);
break;
case 't':
- ch = '\t';
+ c = '\t';
+ shiftChars(1);
break;
- case '\\':
- ch = '\\';
+
+ case ' ':
+ case '\r':
+ case '\n':
+ shiftChars(1); /* Shift the backslash */
+ readLineContinuation();
+ continue;
+
+ case EOF: /* Can't really print that one */
+ error("Illegal character escape at end of input\n");
+ c = '\\';
break;
- case '"':
- ch = '"';
- break;
- case ',':
- ch = ',';
- break;
- case '{':
- ch = '{';
- break;
- case '}':
- ch = '}';
- break;
default:
- maxLength = MAXSTRLEN - index;
- length = CopyMacroArg(&yylval.tzString[index],
- maxLength, ch);
+ error("Illegal character escape '%s'\n", print(c));
+ c = '\\';
+ break;
+ }
+ break;
- if (length != 0)
- index += length;
- else
- fatalerror("Illegal character escape '%c'\n", ch);
+ case '{': /* Symbol interpolation */
+ shiftChars(1);
+ char const *ptr = readInterpolation();
- ch = 0;
- break;
+ if (ptr) {
+ while (*ptr) {
+ if (i == sizeof(yylval.tzString))
+ break;
+ yylval.tzString[i++] = *ptr++;
+ }
}
- } else if (ch == '{') {
- // Get bracketed symbol within string.
- index += yylex_ReadBracketedSymbol(yylval.tzString,
- index);
- ch = 0;
+ continue; /* Do not copy an additional character */
+
+ /* Regular characters will just get copied */
}
+ if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */
+ yylval.tzString[i++] = c;
+ shiftChars(1);
+ }
+}
- if (ch)
- yylex_StringWriteChar(yylval.tzString, index++, ch);
+/* Function to report one character's worth of garbage bytes */
+
+static char const *reportGarbageChar(unsigned char firstByte)
+{
+ static char bytes[6 + 2 + 1]; /* Max size of a UTF-8 encoded code point, plus "''\0" */
+ /* First, attempt UTF-8 decoding */
+ uint32_t state = 0; /* UTF8_ACCEPT */
+ uint32_t codepoint;
+ uint8_t size = 0; /* Number of additional bytes to shift */
+
+ bytes[1] = firstByte; /* No need to init the rest of the array */
+ decode(&state, &codepoint, firstByte);
+ while (state != 0 && state != 1 /* UTF8_REJECT */) {
+ int c = peek(size++);
+
+ if (c == EOF)
+ break;
+ bytes[size + 1] = c;
+ decode(&state, &codepoint, c);
}
- yylex_StringWriteChar(yylval.tzString, index, 0);
+ if (state == 0 && (codepoint > UCHAR_MAX || isprint((unsigned char)codepoint))) {
+ /* Character is valid, printable UTF-8! */
+ shiftChars(size);
+ bytes[0] = '\'';
+ bytes[size + 2] = '\'';
+ bytes[size + 3] = '\0';
+ return bytes;
+ }
- if (*pLexBuffer == '"')
- pLexBuffer++;
- else
- fatalerror("Unterminated string\n");
+ /* The character isn't valid UTF-8, so we'll only print that first byte */
+ if (isprint(firstByte)) {
+ /* bytes[1] = firstByte; */
+ bytes[0] = '\'';
+ bytes[2] = '\'';
+ bytes[3] = '\0';
+ return bytes;
+ }
+ /* Well then, print its hex value */
+ static char const hexChars[16] = "0123456789ABCDEF";
+
+ bytes[0] = '0';
+ bytes[1] = 'x';
+ bytes[2] = hexChars[firstByte >> 4];
+ bytes[3] = hexChars[firstByte & 0x0f];
+ bytes[4] = '\0';
+ return bytes;
}
-static uint32_t yylex_NORMAL(void)
+/* Lexer core */
+
+static int yylex_NORMAL(void)
{
- struct sLexString *pLongestFixed = NULL;
- uint32_t nFloatMask, nFloatLen;
- uint32_t linestart = AtLineStart;
+ dbgPrint("Lexing in normal mode, line=%" PRIu32 ", col=%" PRIu32 "\n",
+ lexer_GetLineNo(), lexer_GetColNo());
+ for (;;) {
+ int c = nextChar();
- AtLineStart = 0;
+ switch (c) {
+ /* Ignore whitespace and comments */
-scanagain:
- while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
- linestart = 0;
- pLexBuffer++;
- }
+ case '*':
+ if (!lexerState->atLineStart)
+ return T_OP_MUL;
+ warning(WARNING_OBSOLETE,
+ "'*' is deprecated for comments, please use ';' instead\n");
+ /* fallthrough */
+ case ';':
+ discardComment();
+ /* fallthrough */
+ case ' ':
+ case '\t':
+ break;
- if (*pLexBuffer == 0) {
- // Reached the end of a file, macro, or rept.
- if (yywrap() == 0) {
- linestart = AtLineStart;
- AtLineStart = 0;
- goto scanagain;
- }
- }
+ /* Handle unambiguous single-char tokens */
- /* Check for line continuation character */
- if (*pLexBuffer == '\\') {
- /*
- * Look for line continuation character after a series of
- * spaces. This is also useful for files that use Windows line
- * endings: "\r\n" is replaced by " \n" before the lexer has the
- * opportunity to see it.
- */
- if (pLexBuffer[1] == ' ' || pLexBuffer[1] == '\t') {
- pLexBuffer += 2;
- while (1) {
- if (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
- pLexBuffer++;
- } else if (*pLexBuffer == '\n') {
- pLexBuffer++;
- nLineNo++;
- goto scanagain;
- } else {
- error("Expected a new line after the continuation character.\n");
- pLexBuffer++;
+ case '^':
+ return T_OP_XOR;
+ case '+':
+ return T_OP_ADD;
+ case '-':
+ return T_OP_SUB;
+ case '/':
+ return T_OP_DIV;
+ case '~':
+ return T_OP_NOT;
+
+ case '@':
+ yylval.tzSym[0] = '@';
+ yylval.tzSym[1] = '\0';
+ return T_ID;
+
+ /* Handle accepted single chars */
+
+ case '[':
+ case ']':
+ case '(':
+ case ')':
+ case ',':
+ case ':':
+ return c;
+
+ /* Handle ambiguous 1- or 2-char tokens */
+ char secondChar;
+
+ case '|': /* Either binary or logical OR */
+ secondChar = peek(0);
+ if (secondChar == '|') {
+ shiftChars(1);
+ return T_OP_LOGICOR;
+ }
+ return T_OP_OR;
+
+ case '=': /* Either SET alias, or EQ */
+ secondChar = peek(0);
+ if (secondChar == '=') {
+ shiftChars(1);
+ return T_OP_LOGICEQU;
+ }
+ return T_POP_EQUAL;
+
+ case '<': /* Either a LT, LTE, or left shift */
+ secondChar = peek(0);
+ if (secondChar == '=') {
+ shiftChars(1);
+ return T_OP_LOGICLE;
+ } else if (secondChar == '<') {
+ shiftChars(1);
+ return T_OP_SHL;
+ }
+ return T_OP_LOGICLT;
+
+ case '>': /* Either a GT, GTE, or right shift */
+ secondChar = peek(0);
+ if (secondChar == '=') {
+ shiftChars(1);
+ return T_OP_LOGICGE;
+ } else if (secondChar == '>') {
+ shiftChars(1);
+ return T_OP_SHR;
+ }
+ return T_OP_LOGICGT;
+
+ case '!': /* Either a NEQ, or negation */
+ secondChar = peek(0);
+ if (secondChar == '=') {
+ shiftChars(1);
+ return T_OP_LOGICNE;
+ }
+ return T_OP_LOGICNOT;
+
+ /* Handle numbers */
+
+ case '$':
+ yylval.nConstValue = 0;
+ readHexNumber();
+ /* Attempt to match `$ff00+c` */
+ if (yylval.nConstValue == 0xff00) {
+ /* Whitespace is ignored anyways */
+ while (isWhitespace(c = peek(0)))
+ shiftChars(1);
+ if (c == '+') {
+ /* FIXME: not great due to large lookahead */
+ uint8_t distance = 1;
+
+ do {
+ c = peek(distance++);
+ } while (isWhitespace(c));
+
+ if (c == 'c' || c == 'C') {
+ shiftChars(distance);
+ return T_MODE_HW_C;
+ }
}
}
- }
+ return T_NUMBER;
- /* Line continuation character */
- if (pLexBuffer[1] == '\n') {
- pLexBuffer += 2;
- nLineNo++;
- goto scanagain;
- }
+ case '0': /* Decimal number */
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ readNumber(10, c - '0');
+ if (peek(0) == '.') {
+ shiftChars(1);
+ readFractionalPart();
+ }
+ return T_NUMBER;
- /*
- * If there isn't a newline character or a space, ignore the
- * character '\'. It will eventually be handled by other
- * functions like PutMacroArg().
- */
- }
+ case '&':
+ secondChar = peek(0);
+ if (secondChar == '&') {
+ shiftChars(1);
+ return T_OP_LOGICAND;
+ } else if (secondChar >= '0' && secondChar <= '7') {
+ readNumber(8, 0);
+ return T_NUMBER;
+ }
+ return T_OP_AND;
- /*
- * Try to match an identifier, macro argument (e.g. \1),
- * or numeric literal.
- */
- yylex_GetFloatMaskAndFloatLen(&nFloatMask, &nFloatLen);
+ case '%': /* Either a modulo, or a binary constant */
+ secondChar = peek(0);
+ if (secondChar != binDigits[0] && secondChar != binDigits[1])
+ return T_OP_MOD;
- /* Try to match a keyword or operator. */
- pLongestFixed = yylex_GetLongestFixed();
+ yylval.nConstValue = 0;
+ readBinaryNumber();
+ return T_NUMBER;
- if (nFloatLen == 0 && pLongestFixed == NULL) {
- /*
- * No keyword, identifier, operator, or numerical literal
- * matches.
- */
+ case '`': /* Gfx constant */
+ readGfxConstant();
+ return T_NUMBER;
- if (*pLexBuffer == '"') {
- pLexBuffer++;
- yylex_ReadQuotedString();
+ /* Handle strings */
+
+ case '"':
+ readString();
return T_STRING;
- } else if (*pLexBuffer == '{') {
- pLexBuffer++;
- size_t len = yylex_ReadBracketedSymbol(yylval.tzString,
- 0);
- yylval.tzString[len] = 0;
- return T_STRING;
- }
- /*
- * It's not a keyword, operator, identifier, macro argument,
- * numeric literal, string, or bracketed symbol, so just return
- * the ASCII character.
- */
- unsigned char ch = *pLexBuffer++;
+ /* Handle newlines and EOF */
- if (ch == '\n')
- AtLineStart = 1;
+ case '\r':
+ return '\r';
+ case '\n':
+ return '\n';
- /*
- * Check for invalid unprintable characters.
- * They may not be readily apparent in a text editor,
- * so this is useful for identifying encoding problems.
- */
- if (ch != 0
- && ch != '\n'
- && !(ch >= 0x20 && ch <= 0x7E))
- fatalerror("Found garbage character: 0x%02X\n", ch);
+ case EOF:
+ return 0;
- return ch;
- }
+ /* Handle escapes */
- if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) {
- /*
- * Longest match was an identifier, macro argument, or numeric
- * literal.
- */
- struct sLexFloat *token = lexgetfloat(nFloatMask);
+ case '\\':
+ c = peek(0);
- if (token->Callback) {
- int32_t done = token->Callback(pLexBuffer, nFloatLen);
+ switch (c) {
+ case ' ':
+ case '\r':
+ case '\n':
+ readLineContinuation();
+ break;
- if (!done)
- goto scanagain;
- }
+ case EOF:
+ error("Illegal character escape at end of input\n");
+ break;
+ default:
+ shiftChars(1);
+ error("Illegal character escape '%s'\n", print(c));
+ }
+ break;
- uint32_t type = token->nToken;
+ /* Handle identifiers and escapes... or error out */
- if (type == T_ID && strchr(yylval.tzSym, '.'))
- type = T_LOCAL_ID;
+ default:
+ if (startsIdentifier(c)) {
+ int tokenType = readIdentifier(c);
- if (linestart && type == T_ID)
- return T_LABEL;
- return type;
- }
+ /* If a keyword, don't try to expand */
+ if (tokenType != T_ID && tokenType != T_LOCAL_ID)
+ return tokenType;
- /* Longest match was a keyword or operator. */
- pLexBuffer += pLongestFixed->nNameLength;
- yylval.nConstValue = pLongestFixed->nToken;
- return pLongestFixed->nToken;
+ if (lexerState->expandStrings) {
+ /* Attempt string expansion */
+ struct Symbol const *sym = sym_FindSymbol(yylval.tzSym);
+
+ if (sym && sym->type == SYM_EQUS) {
+ char const *s = sym_GetStringValue(sym);
+
+ beginExpansion(0, 0, s, strlen(s), sym->name);
+ continue; /* Restart, reading from the new buffer */
+ }
+ }
+
+ if (tokenType == T_ID && lexerState->atLineStart)
+ return T_LABEL;
+
+ return tokenType;
+ }
+
+ /* Do not report weird characters when capturing, it'll be done later */
+ if (!lexerState->capturing) {
+ /* TODO: try to group reportings */
+ error("Unknown character %s\n", reportGarbageChar(c));
+ }
+ }
+ lexerState->atLineStart = false;
+ }
}
-static uint32_t yylex_MACROARGS(void)
+static int yylex_RAW(void)
{
- size_t index = 0;
- size_t length, maxLength;
+ dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n",
+ lexer_GetLineNo(), lexer_GetColNo());
- while ((*pLexBuffer == ' ') || (*pLexBuffer == '\t'))
- pLexBuffer++;
+ /* This is essentially a modified `readString` */
+ size_t i = 0;
+ bool insideString = false;
- while ((*pLexBuffer != ',') && (*pLexBuffer != '\n')) {
- char ch = *pLexBuffer++;
+ /* Trim left of string... */
+ while (isWhitespace(peek(0)))
+ shiftChars(1);
- if (ch == '\\') {
- ch = *pLexBuffer++;
+ for (;;) {
+ int c = peek(0);
- switch (ch) {
- case 'n':
- ch = '\n';
+ switch (c) {
+ case '"':
+ insideString = !insideString;
+ /* Other than that, just process quotes normally */
+ break;
+
+ case ';': /* Comments inside macro args */
+ if (insideString)
break;
- case 't':
- ch = '\t';
- break;
- case '\\':
- ch = '\\';
- break;
- case '"':
- ch = '\"';
- break;
+ discardComment();
+ c = peek(0);
+ /* fallthrough */
+ case ',':
+ case '\r':
+ case '\n':
+ case EOF:
+ if (i == sizeof(yylval.tzString)) {
+ i--;
+ warning(WARNING_LONG_STR, "Macro argument too long\n");
+ }
+ /* Trim whitespace */
+ while (i && isWhitespace(yylval.tzString[i - 1]))
+ i--;
+ /* Empty macro args break their expansion, so prevent that */
+ if (i == 0) {
+ /* Return the EOF token, and don't shift a non-existent char! */
+ if (c == EOF)
+ return 0;
+ shiftChars(1);
+ return c;
+ }
+ yylval.tzString[i] = '\0';
+ dbgPrint("Read raw string \"%s\"\n", yylval.tzString);
+ return T_STRING;
+
+ case '\\': /* Character escape */
+ c = peek(1);
+ switch (c) {
case ',':
- ch = ',';
+ shiftChars(1);
break;
- case '{':
- ch = '{';
- break;
- case '}':
- ch = '}';
- break;
+
case ' ':
- case '\t':
- /*
- * Look for line continuation character after a
- * series of spaces. This is also useful for
- * files that use Windows line endings: "\r\n"
- * is replaced by " \n" before the lexer has the
- * opportunity to see it.
- */
- while (1) {
- if (*pLexBuffer == ' '
- || *pLexBuffer == '\t') {
- pLexBuffer++;
- } else if (*pLexBuffer == '\n') {
- pLexBuffer++;
- nLineNo++;
- ch = 0;
- break;
- } else {
- error("Expected a new line after the continuation character.\n");
- }
- }
- break;
+ case '\r':
case '\n':
- /* Line continuation character */
- nLineNo++;
- ch = 0;
+ shiftChars(1); /* Shift the backslash */
+ readLineContinuation();
+ continue;
+
+ case EOF: /* Can't really print that one */
+ error("Illegal character escape at end of input\n");
+ c = '\\';
break;
- default:
- maxLength = MAXSTRLEN - index;
- length = CopyMacroArg(&yylval.tzString[index],
- maxLength, ch);
+ default: /* Pass the rest as-is */
+ c = '\\';
+ break;
+ }
+ break;
- if (length != 0)
- index += length;
- else
- fatalerror("Illegal character escape '%c'\n", ch);
+ case '{': /* Symbol interpolation */
+ shiftChars(1);
+ char const *ptr = readInterpolation();
- ch = 0;
- break;
+ if (ptr) {
+ while (*ptr) {
+ if (i == sizeof(yylval.tzString))
+ break;
+ yylval.tzString[i++] = *ptr++;
+ }
}
- } else if (ch == '{') {
- index += yylex_ReadBracketedSymbol(yylval.tzString,
- index);
- ch = 0;
+ continue; /* Do not copy an additional character */
+
+ /* Regular characters will just get copied */
}
- if (ch)
- yylex_StringWriteChar(yylval.tzString, index++, ch);
+ if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */
+ yylval.tzString[i++] = c;
+ shiftChars(1);
}
+}
- if (index) {
- yylex_StringWriteChar(yylval.tzString, index, 0);
+/*
+ * This function uses the fact that `if`, etc. constructs are only valid when
+ * there's nothing before them on their lines. This enables filtering
+ * "meaningful" (= at line start) vs. "meaningless" (everything else) tokens.
+ * It's especially important due to macro args not being handled in this
+ * state, and lexing them in "normal" mode potentially producing such tokens.
+ */
+static int skipIfBlock(bool toEndc)
+{
+ dbgPrint("Skipping IF block (toEndc = %s)\n", toEndc ? "true" : "false");
+ lexer_SetMode(LEXER_NORMAL);
+ int startingDepth = nIFDepth;
+ int token;
+ bool atLineStart = lexerState->atLineStart;
- /* trim trailing white space at the end of the line */
- if (*pLexBuffer == '\n')
- yylex_TrimEnd(yylval.tzString, index);
+ /* Prevent expanding macro args in this state */
+ lexerState->disableMacroArgs = true;
- return T_STRING;
- } else if (*pLexBuffer == '\n') {
- pLexBuffer++;
- AtLineStart = 1;
- return '\n';
- } else if (*pLexBuffer == ',') {
- pLexBuffer++;
- return ',';
+ for (;;) {
+ if (atLineStart) {
+ int c;
+
+ for (;;) {
+ c = peek(0);
+ if (!isWhitespace(c))
+ break;
+ shiftChars(1);
+ }
+
+ if (startsIdentifier(c)) {
+ shiftChars(1);
+ token = readIdentifier(c);
+ switch (token) {
+ case T_POP_IF:
+ nIFDepth++;
+ break;
+
+ case T_POP_ELIF:
+ case T_POP_ELSE:
+ if (toEndc) /* Ignore ELIF and ELSE, go to ENDC */
+ break;
+ /* fallthrough */
+ case T_POP_ENDC:
+ if (nIFDepth == startingDepth)
+ goto finish;
+ if (token == T_POP_ENDC)
+ nIFDepth--;
+ }
+ }
+ atLineStart = false;
+ }
+
+ /* Read chars until EOL */
+ do {
+ int c = nextChar();
+
+ if (c == EOF) {
+ token = 0;
+ goto finish;
+ } else if (c == '\\') {
+ /* Unconditionally skip the next char, including line conts */
+ c = nextChar();
+ } else if (c == '\r' || c == '\n') {
+ atLineStart = true;
+ }
+
+ if (c == '\r' || c == '\n')
+ /* Do this both on line continuations and plain EOLs */
+ lexerState->lineNo++;
+ /* Handle CRLF */
+ if (c == '\r' && peek(0) == '\n')
+ shiftChars(1);
+ } while (!atLineStart);
}
+finish:
- fatalerror("Internal error in %s\n", __func__);
+ lexerState->disableMacroArgs = false;
+ lexerState->atLineStart = false;
+
+ return token;
}
+static int yylex_SKIP_TO_ELIF(void)
+{
+ return skipIfBlock(false);
+}
+
+static int yylex_SKIP_TO_ENDC(void)
+{
+ return skipIfBlock(true);
+}
+
int yylex(void)
{
- int returnedChar;
+restart:
+ if (lexerState->atLineStart && lexerStateEOL) {
+ lexer_SetState(lexerStateEOL);
+ lexerStateEOL = NULL;
+ }
+ if (lexerState->atLineStart) {
+ /* Newlines read within an expansion should not increase the line count */
+ if (!lexerState->expansions || lexerState->expansions->distance) {
+ lexerState->lineNo++;
+ lexerState->colNo = 0;
+ }
+ }
- switch (lexerstate) {
- case LEX_STATE_NORMAL:
- returnedChar = yylex_NORMAL();
- break;
- case LEX_STATE_MACROARGS:
- returnedChar = yylex_MACROARGS();
- break;
- default:
- fatalerror("%s: Internal error.\n", __func__);
+ static int (* const lexerModeFuncs[])(void) = {
+ [LEXER_NORMAL] = yylex_NORMAL,
+ [LEXER_RAW] = yylex_RAW,
+ [LEXER_SKIP_TO_ELIF] = yylex_SKIP_TO_ELIF,
+ [LEXER_SKIP_TO_ENDC] = yylex_SKIP_TO_ENDC
+ };
+ int token = lexerModeFuncs[lexerState->mode]();
+
+ /* Make sure to terminate files with a line feed */
+ if (token == 0) {
+ if (lexerState->lastToken != '\n') {
+ dbgPrint("Forcing EOL at EOF\n");
+ token = '\n';
+ } else { /* Try to switch to new buffer; if it succeeds, scan again */
+ dbgPrint("Reached EOF!\n");
+ /* Captures end at their buffer's boundary no matter what */
+ if (!lexerState->capturing) {
+ if (!yywrap())
+ goto restart;
+ dbgPrint("Reached end of input.");
+ return 0;
+ }
+ }
+ } else if (token == '\r') { /* Handle CR and CRLF line endings */
+ token = '\n'; /* We universally use '\n' as the value for line ending tokens */
+ if (peek(0) == '\n')
+ shiftChars(1); /* Shift the CRLF's LF */
}
+ lexerState->lastToken = token;
- /* Check if string expansions were fully read */
- while (pCurrentStringExpansion
- && pCurrentStringExpansion->pBuffer == pLexBufferRealStart
- && pCurrentStringExpansion->pBufferPos <= pLexBuffer) {
- struct sStringExpansionPos *pParent =
- pCurrentStringExpansion->pParent;
- free(pCurrentStringExpansion->tzName);
- free(pCurrentStringExpansion);
+ lexerState->atLineStart = false;
+ if (token == '\n')
+ lexerState->atLineStart = true;
- pCurrentStringExpansion = pParent;
- nNbStringExpansions--;
+ return token;
+}
+
+static char *startCapture(void)
+{
+ assert(!lexerState->expansions);
+
+ lexerState->capturing = true;
+ lexerState->captureSize = 0;
+ lexerState->disableMacroArgs = true;
+
+ if (lexerState->isMmapped) {
+ return &lexerState->ptr[lexerState->offset];
+ } else {
+ lexerState->captureCapacity = 128; /* The initial size will be twice that */
+ reallocCaptureBuf();
+ return lexerState->captureBuf;
}
+}
- return returnedChar;
+void lexer_CaptureRept(char **capture, size_t *size)
+{
+ char *captureStart = startCapture();
+ unsigned int level = 0;
+ int c;
+
+ /*
+ * Due to parser internals, it reads the EOL after the expression before calling this.
+ * Thus, we don't need to keep one in the buffer afterwards.
+ * The following assertion checks that.
+ */
+ assert(lexerState->atLineStart);
+ for (;;) {
+ lexerState->lineNo++;
+ /* We're at line start, so attempt to match a `REPT` or `ENDR` token */
+ do { /* Discard initial whitespace */
+ c = nextChar();
+ } while (isWhitespace(c));
+ /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */
+ if (startsIdentifier(c)) {
+ switch (readIdentifier(c)) {
+ case T_POP_REPT:
+ level++;
+ /* Ignore the rest of that line */
+ break;
+
+ case T_POP_ENDR:
+ if (!level) {
+ /* Read (but don't capture) until EOL or EOF */
+ lexerState->capturing = false;
+ do {
+ c = nextChar();
+ } while (c != EOF && c != '\r' && c != '\n');
+ /* Handle Windows CRLF */
+ if (c == '\r' && peek(0) == '\n')
+ shiftChars(1);
+ goto finish;
+ }
+ level--;
+ }
+ }
+
+ /* Just consume characters until EOL or EOF */
+ for (;;) {
+ if (c == EOF) {
+ error("Unterminated REPT block\n");
+ lexerState->capturing = false;
+ goto finish;
+ } else if (c == '\n') {
+ break;
+ } else if (c == '\r') {
+ if (peek(0) == '\n')
+ shiftChars(1);
+ break;
+ }
+ c = nextChar();
+ }
+ }
+
+finish:
+ assert(!lexerState->capturing);
+ *capture = captureStart;
+ *size = lexerState->captureSize - strlen("ENDR");
+ lexerState->captureBuf = NULL;
+ lexerState->disableMacroArgs = false;
+}
+
+void lexer_CaptureMacroBody(char **capture, size_t *size)
+{
+ char *captureStart = startCapture();
+ unsigned int level = 0;
+ int c = peek(0);
+
+ /* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */
+ if (lexerState->isMmapped)
+ lexerState->isReferenced = true;
+
+ /*
+ * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
+ * this. Thus, we need to keep one in the buffer afterwards.
+ * (Note that this also means the captured buffer begins with a newline and maybe comment)
+ * The following assertion checks that.
+ */
+ assert(!lexerState->atLineStart);
+ for (;;) {
+ /* Just consume characters until EOL or EOF */
+ for (;;) {
+ if (c == EOF) {
+ error("Unterminated macro definition\n");
+ lexerState->capturing = false;
+ goto finish;
+ } else if (c == '\n') {
+ break;
+ } else if (c == '\r') {
+ if (peek(0) == '\n')
+ shiftChars(1);
+ break;
+ }
+ c = nextChar();
+ }
+
+ /* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
+ do { /* Discard initial whitespace */
+ c = nextChar();
+ } while (isWhitespace(c));
+ /* Now, try to match either `REPT` or `ENDR` as a **whole** identifier */
+ if (startsIdentifier(c)) {
+ switch (readIdentifier(c)) {
+ case T_ID:
+ /* We have an initial label, look for a single colon */
+ do {
+ c = nextChar();
+ } while (isWhitespace(c));
+ if (c != ':') /* If not a colon, give up */
+ break;
+ /* And finally, a `MACRO` token */
+ do {
+ c = nextChar();
+ } while (isWhitespace(c));
+ if (!startsIdentifier(c))
+ break;
+ if (readIdentifier(c) != T_POP_MACRO)
+ break;
+ level++;
+ break;
+
+ case T_POP_ENDM:
+ if (!level) {
+ /* Read (but don't capture) until EOL or EOF */
+ lexerState->capturing = false;
+ do {
+ c = peek(0);
+ if (c == EOF || c == '\r' || c == '\n')
+ break;
+ shiftChars(1);
+ } while (c != EOF && c != '\r' && c != '\n');
+ /* Handle Windows CRLF */
+ if (c == '\r' && peek(1) == '\n')
+ shiftChars(1);
+ goto finish;
+ }
+ level--;
+ }
+ }
+ lexerState->lineNo++;
+ }
+
+finish:
+ assert(!lexerState->capturing);
+ *capture = captureStart;
+ *size = lexerState->captureSize - strlen("ENDM");
+ lexerState->captureBuf = NULL;
+ lexerState->disableMacroArgs = false;
}
--- a/src/asm/macro.c
+++ b/src/asm/macro.c
@@ -29,7 +29,8 @@
sizeof(((struct MacroArgs){0}).args[0]) * (nbArgs))
static struct MacroArgs *macroArgs = NULL;
-static uint32_t uniqueID = -1;
+static uint32_t uniqueID = 0;
+static uint32_t maxUniqueID = 0;
/*
* The initialization is somewhat harmful, since it is never used, but it
* guarantees the size of the buffer will be correct. I was unable to find a
@@ -61,7 +62,7 @@
#define macArgs (*argPtr)
if (macArgs->nbArgs == MAXMACROARGS)
error("A maximum of " EXPAND_AND_STR(MAXMACROARGS)
- " arguments is allowed\n");
+ " arguments is allowed\n");
if (macArgs->nbArgs >= macArgs->capacity) {
macArgs->capacity *= 2;
/* Check that overflow didn't roll us back */
@@ -88,6 +89,9 @@
char const *macro_GetArg(uint32_t i)
{
+ if (!macroArgs)
+ return NULL;
+
uint32_t realIndex = i + macroArgs->shift - 1;
return realIndex >= macroArgs->nbArgs ? NULL
@@ -107,13 +111,21 @@
void macro_SetUniqueID(uint32_t id)
{
uniqueID = id;
- if (id == -1) {
+ if (id == 0) {
uniqueIDPtr = NULL;
} else {
+ if (uniqueID > maxUniqueID)
+ maxUniqueID = uniqueID;
/* The buffer is guaranteed to be the correct size */
sprintf(uniqueIDBuf, "_%" PRIu32, id);
uniqueIDPtr = uniqueIDBuf;
}
+}
+
+uint32_t macro_UseNewUniqueID(void)
+{
+ macro_SetUniqueID(++maxUniqueID);
+ return maxUniqueID;
}
void macro_ShiftCurrentArgs(void)
--- a/src/asm/main.c
+++ b/src/asm/main.c
@@ -6,6 +6,7 @@
* SPDX-License-Identifier: MIT
*/
+#include <ctype.h>
#include <errno.h>
#include <float.h>
#include <inttypes.h>
@@ -22,8 +23,10 @@
#include "asm/lexer.h"
#include "asm/main.h"
#include "asm/output.h"
+#include "asm/rpn.h"
#include "asm/symbol.h"
#include "asm/warning.h"
+#include "asmy.h"
#include "extern/err.h"
#include "extern/getopt.h"
@@ -31,8 +34,6 @@
#include "helpers.h"
#include "version.h"
-extern int yyparse(void);
-
size_t cldefines_index;
size_t cldefines_numindices;
size_t cldefines_bufsize;
@@ -41,11 +42,7 @@
clock_t nStartClock, nEndClock;
uint32_t nTotalLines, nIFDepth;
-bool skipElif;
-uint32_t unionStart[128], unionSize[128];
-int32_t nLineNo;
-
#if defined(YYDEBUG) && YYDEBUG
extern int yydebug;
#endif
@@ -74,66 +71,11 @@
struct sOptionStackEntry *pOptionStack;
-void opt_SetCurrentOptions(struct sOptions *pOpt)
+void opt_SetCurrentOptions(struct sOptions *opt)
{
- if (nGBGfxID != -1) {
- lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[0],
- CurrentOptions.gbgfx[0]);
- lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[1],
- CurrentOptions.gbgfx[1]);
- lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[2],
- CurrentOptions.gbgfx[2]);
- lex_FloatDeleteRange(nGBGfxID, CurrentOptions.gbgfx[3],
- CurrentOptions.gbgfx[3]);
- lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[0],
- CurrentOptions.gbgfx[0]);
- lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[1],
- CurrentOptions.gbgfx[1]);
- lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[2],
- CurrentOptions.gbgfx[2]);
- lex_FloatDeleteSecondRange(nGBGfxID, CurrentOptions.gbgfx[3],
- CurrentOptions.gbgfx[3]);
- }
- if (nBinaryID != -1) {
- lex_FloatDeleteRange(nBinaryID, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatDeleteRange(nBinaryID, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
- lex_FloatDeleteSecondRange(nBinaryID, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatDeleteSecondRange(nBinaryID, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
- }
- CurrentOptions = *pOpt;
-
- if (nGBGfxID != -1) {
- lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[0],
- CurrentOptions.gbgfx[0]);
- lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[1],
- CurrentOptions.gbgfx[1]);
- lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[2],
- CurrentOptions.gbgfx[2]);
- lex_FloatAddRange(nGBGfxID, CurrentOptions.gbgfx[3],
- CurrentOptions.gbgfx[3]);
- lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[0],
- CurrentOptions.gbgfx[0]);
- lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[1],
- CurrentOptions.gbgfx[1]);
- lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[2],
- CurrentOptions.gbgfx[2]);
- lex_FloatAddSecondRange(nGBGfxID, CurrentOptions.gbgfx[3],
- CurrentOptions.gbgfx[3]);
- }
- if (nBinaryID != -1) {
- lex_FloatAddRange(nBinaryID, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddRange(nBinaryID, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
- lex_FloatAddSecondRange(nBinaryID, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddSecondRange(nBinaryID, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
- }
+ CurrentOptions = *opt;
+ lexer_SetGfxDigits(CurrentOptions.gbgfx);
+ lexer_SetBinDigits(CurrentOptions.binary);
}
void opt_Parse(char *s)
@@ -251,6 +193,22 @@
sym_AddString(cldefines[i], cldefines[i + 1]);
}
+void upperstring(char *s)
+{
+ while (*s) {
+ *s = toupper(*s);
+ s++;
+ }
+}
+
+void lowerstring(char *s)
+{
+ while (*s) {
+ *s = tolower(*s);
+ s++;
+ }
+}
+
/* Escapes Make-special chars from a string */
static char *make_escape(const char *str)
{
@@ -350,11 +308,11 @@
yydebug = 1;
#endif
- nMaxRecursionDepth = 64;
oGeneratePhonyDeps = false;
oGeneratedMissingIncludes = false;
oFailedOnMissingInclude = false;
tzTargetFileName = NULL;
+ uint32_t maxRecursionDepth = 64;
size_t nTargetFileNameLen = 0;
DefaultOptions.gbgfx[0] = '0';
@@ -433,7 +391,7 @@
break;
case 'r':
- nMaxRecursionDepth = strtoul(optarg, &ep, 0);
+ maxRecursionDepth = strtoul(optarg, &ep, 0);
if (optarg[0] == '\0' || *ep != '\0')
errx(1, "Invalid argument for option 'r'");
@@ -516,8 +474,6 @@
tzMainfile = argv[argc - 1];
- setup_lexer();
-
if (verbose)
printf("Assembling %s\n", tzMainfile);
@@ -528,19 +484,20 @@
fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile);
}
+ /* Init file stack; important to do first, since it provides the file name, line, etc */
+ lexer_Init();
+ fstk_Init(tzMainfile, maxRecursionDepth);
+
nStartClock = clock();
- nLineNo = 1;
nTotalLines = 0;
nIFDepth = 0;
- skipElif = true;
sym_Init();
sym_SetExportAll(exportall);
- fstk_Init(tzMainfile);
+
opt_ParseDefines();
charmap_New("main", NULL);
- yy_set_state(LEX_STATE_NORMAL);
opt_SetCurrentOptions(&DefaultOptions);
if (yyparse() != 0 || nbErrors != 0)
--- a/src/asm/output.c
+++ b/src/asm/output.c
@@ -12,6 +12,7 @@
#include <assert.h>
#include <errno.h>
+#include <inttypes.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
@@ -33,7 +34,8 @@
#include "platform.h" // strdup
struct Patch {
- char tzFilename[_MAX_PATH + 1];
+ struct FileStackNode const *src;
+ uint32_t lineNo;
uint32_t nOffset;
struct Section *pcSection;
uint32_t pcOffset;
@@ -62,19 +64,17 @@
static struct Assertion *assertions = NULL;
+static struct FileStackNode *fileStackNodes = NULL;
+
/*
* Count the number of sections used in this object
*/
static uint32_t countsections(void)
{
- struct Section *sect;
uint32_t count = 0;
- sect = pSectionList;
- while (sect) {
+ for (struct Section const *sect = pSectionList; sect; sect = sect->next)
count++;
- sect = sect->next;
- }
return count;
}
@@ -129,16 +129,60 @@
fputc(0, f);
}
+static uint32_t getNbFileStackNodes(void)
+{
+ return fileStackNodes ? fileStackNodes->ID + 1 : 0;
+}
+
+void out_RegisterNode(struct FileStackNode *node)
+{
+ /* If node is not already registered, register it (and parents), and give it a unique ID */
+ while (node->ID == -1) {
+ node->ID = getNbFileStackNodes();
+ if (node->ID == -1)
+ fatalerror("Reached too many file stack nodes; try splitting the file up\n");
+ node->next = fileStackNodes;
+ fileStackNodes = node;
+
+ /* Also register the node's parents */
+ node = node->parent;
+ if (!node)
+ break;
+ }
+}
+
+void out_ReplaceNode(struct FileStackNode *node)
+{
+ (void)node;
+#if 0
+This is code intended to replace a node, which is pretty useless until ref counting is added...
+
+ struct FileStackNode **ptr = &fileStackNodes;
+
+ /*
+ * The linked list is supposed to have decrementing IDs, so iterate with less memory reads,
+ * to hopefully hit the cache less. A debug check is added after, in case a change is made
+ * that breaks this assumption.
+ */
+ for (uint32_t i = fileStackNodes->ID; i != node->ID; i--)
+ ptr = &(*ptr)->next;
+ assert((*ptr)->ID == node->ID);
+
+ node->next = (*ptr)->next;
+ assert(!node->next || node->next->ID == node->ID - 1); /* Catch inconsistencies early */
+ /* TODO: unreference the node */
+ *ptr = node;
+#endif
+}
+
/*
* Return a section's ID
*/
static uint32_t getsectid(struct Section const *sect)
{
- struct Section const *sec;
+ struct Section const *sec = pSectionList;
uint32_t ID = 0;
- sec = pSectionList;
-
while (sec) {
if (sec == sect)
return ID;
@@ -159,7 +203,10 @@
*/
static void writepatch(struct Patch const *patch, FILE *f)
{
- fputstring(patch->tzFilename, f);
+ assert(patch->src->ID != -1);
+
+ fputlong(patch->src->ID, f);
+ fputlong(patch->lineNo, f);
fputlong(patch->nOffset, f);
fputlong(getSectIDIfAny(patch->pcSection), f);
fputlong(patch->pcOffset, f);
@@ -206,8 +253,10 @@
if (!sym_IsDefined(sym)) {
fputc(SYMTYPE_IMPORT, f);
} else {
+ assert(sym->src->ID != -1);
+
fputc(sym->isExported ? SYMTYPE_EXPORT : SYMTYPE_LOCAL, f);
- fputstring(sym->fileName, f);
+ fputlong(sym->src->ID, f);
fputlong(sym->fileLine, f);
fputlong(getSectIDIfAny(sym_GetSection(sym)), f);
fputlong(sym->value, f);
@@ -214,6 +263,17 @@
}
}
+static void registerSymbol(struct Symbol *sym)
+{
+ *objectSymbolsTail = sym;
+ objectSymbolsTail = &sym->next;
+ out_RegisterNode(sym->src);
+ if (nbSymbols == -1)
+ fatalerror("Registered too many symbols (%" PRIu32
+ "); try splitting up your files\n", (uint32_t)-1);
+ sym->ID = nbSymbols++;
+}
+
/*
* Returns a symbol's ID within the object file
* If the symbol does not have one, one is assigned by registering the symbol
@@ -220,12 +280,8 @@
*/
static uint32_t getSymbolID(struct Symbol *sym)
{
- if (sym->ID == -1) {
- sym->ID = nbSymbols++;
-
- *objectSymbolsTail = sym;
- objectSymbolsTail = &sym->next;
- }
+ if (sym->ID == -1 && !sym_IsPC(sym))
+ registerSymbol(sym);
return sym->ID;
}
@@ -303,22 +359,25 @@
/*
* Allocate a new patch structure and link it into the list
+ * WARNING: all patches are assumed to eventually be written, so the file stack node is registered
*/
-static struct Patch *allocpatch(uint32_t type, struct Expression const *expr,
- uint32_t ofs)
+static struct Patch *allocpatch(uint32_t type, struct Expression const *expr, uint32_t ofs)
{
struct Patch *patch = malloc(sizeof(struct Patch));
uint32_t rpnSize = expr->isKnown ? 5 : expr->nRPNPatchSize;
+ struct FileStackNode *node = fstk_GetFileStack();
if (!patch)
fatalerror("No memory for patch: %s\n", strerror(errno));
- patch->pRPN = malloc(sizeof(*patch->pRPN) * rpnSize);
+ patch->pRPN = malloc(sizeof(*patch->pRPN) * rpnSize);
if (!patch->pRPN)
fatalerror("No memory for patch's RPN expression: %s\n", strerror(errno));
patch->type = type;
- fstk_DumpToStr(patch->tzFilename, sizeof(patch->tzFilename));
+ patch->src = node;
+ out_RegisterNode(node);
+ patch->lineNo = lexer_GetLineNo();
patch->nOffset = ofs;
patch->pcSection = sect_GetSymbolSection();
patch->pcOffset = sect_GetSymbolOffset();
@@ -382,13 +441,28 @@
fputstring(assert->message, f);
}
+static void writeFileStackNode(struct FileStackNode const *node, FILE *f)
+{
+ fputlong(node->parent ? node->parent->ID : -1, f);
+ fputlong(node->lineNo, f);
+ fputc(node->type, f);
+ if (node->type != NODE_REPT) {
+ fputstring(((struct FileStackNamedNode const *)node)->name, f);
+ } else {
+ struct FileStackReptNode const *reptNode = (struct FileStackReptNode const *)node;
+
+ fputlong(reptNode->reptDepth, f);
+ /* Iters are stored by decreasing depth, so reverse the order for output */
+ for (uint32_t i = reptNode->reptDepth; i--; )
+ fputlong(reptNode->iters[i], f);
+ }
+}
+
static void registerExportedSymbol(struct Symbol *symbol, void *arg)
{
(void)arg;
if (sym_IsExported(symbol) && symbol->ID == -1) {
- *objectSymbolsTail = symbol;
- objectSymbolsTail = &symbol->next;
- nbSymbols++;
+ registerSymbol(symbol);
}
}
@@ -410,6 +484,15 @@
fputlong(nbSymbols, f);
fputlong(countsections(), f);
+
+ fputlong(getNbFileStackNodes(), f);
+ for (struct FileStackNode const *node = fileStackNodes; node; node = node->next) {
+ writeFileStackNode(node, f);
+ if (node->next && node->next->ID != node->ID - 1)
+ fatalerror("Internal error: fstack node #%" PRIu32 " follows #%" PRIu32
+ ". Please report this to the developers!\n",
+ node->next->ID, node->ID);
+ }
for (struct Symbol const *sym = objectSymbols; sym; sym = sym->next)
writesymbol(sym, f);
--- a/src/asm/rpn.c
+++ b/src/asm/rpn.c
@@ -258,8 +258,8 @@
if (amount >= 0) {
// Left shift
if (amount >= 32) {
- warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %" PRId32 "\n",
- amount);
+ warning(WARNING_SHIFT_AMOUNT, "Shifting left by large amount %"
+ PRId32 "\n", amount);
return 0;
} else {
--- a/src/asm/section.c
+++ b/src/asm/section.c
@@ -656,9 +656,15 @@
startPos = 0;
}
- FILE *f = fstk_FindFile(s, NULL);
+ char *fullPath = NULL;
+ size_t size = 0;
+ FILE *f = NULL;
+ if (fstk_FindFile(s, &fullPath, &size))
+ f = fopen(fullPath, "rb");
+
if (!f) {
+ free(fullPath);
if (oGeneratedMissingIncludes) {
oFailedOnMissingInclude = true;
return;
@@ -699,6 +705,7 @@
error("Error reading INCBIN file '%s': %s\n", s, strerror(errno));
fclose(f);
+ free(fullPath);
}
void out_BinaryFileSlice(char const *s, int32_t start_pos, int32_t length)
@@ -715,9 +722,15 @@
if (length == 0) /* Don't even bother with 0-byte slices */
return;
- FILE *f = fstk_FindFile(s, NULL);
+ char *fullPath = NULL;
+ size_t size = 0;
+ FILE *f = NULL;
+ if (fstk_FindFile(s, &fullPath, &size))
+ f = fopen(fullPath, "rb");
+
if (!f) {
+ free(fullPath);
if (oGeneratedMissingIncludes) {
oFailedOnMissingInclude = true;
return;
@@ -767,6 +780,7 @@
}
fclose(f);
+ free(fullPath);
}
/*
--- a/src/asm/symbol.c
+++ b/src/asm/symbol.c
@@ -23,6 +23,7 @@
#include "asm/macro.h"
#include "asm/main.h"
#include "asm/mymath.h"
+#include "asm/output.h"
#include "asm/section.h"
#include "asm/symbol.h"
#include "asm/util.h"
@@ -77,14 +78,57 @@
static int32_t Callback_NARG(void)
{
+ if (!macro_GetCurrentArgs()) {
+ error("_NARG does not make sense outside of a macro\n");
+ return 0;
+ }
return macro_NbArgs();
}
static int32_t Callback__LINE__(void)
{
- return nLineNo;
+ return lexer_GetLineNo();
}
+static char const *Callback__FILE__(void)
+{
+ /*
+ * FIXME: this is dangerous, and here's why this is CURRENTLY okay. It's still bad, fix it.
+ * There are only two call sites for this; one copies the contents directly, the other is
+ * EQUS expansions, which cannot straddle file boundaries. So this should be fine.
+ */
+ static char *buf = NULL;
+ static size_t bufsize = 0;
+ char const *fileName = fstk_GetFileName();
+ size_t j = 1;
+
+ /* TODO: is there a way for a file name to be empty? */
+ assert(fileName[0]);
+ /* The assertion above ensures the loop runs at least once */
+ for (size_t i = 0; fileName[i]; i++, j++) {
+ /* Account for the extra backslash inserted below */
+ if (fileName[i] == '"')
+ j++;
+ /* Ensure there will be enough room; DO NOT PRINT ANYTHING ABOVE THIS!! */
+ if (j + 2 >= bufsize) { /* Always keep room for 2 tail chars */
+ bufsize = bufsize ? bufsize * 2 : 64;
+ buf = realloc(buf, bufsize);
+ if (!buf)
+ fatalerror("Failed to grow buffer for file name: %s\n",
+ strerror(errno));
+ }
+ /* Escape quotes, since we're returning a string */
+ if (fileName[i] == '"')
+ buf[j - 1] = '\\';
+ buf[j] = fileName[i];
+ }
+ /* Write everything after the loop, to ensure the buffer has been allocated */
+ buf[0] = '"';
+ buf[j++] = '"';
+ buf[j] = '\0';
+ return buf;
+}
+
static int32_t CallbackPC(void)
{
struct Section const *section = sect_GetSymbolSection();
@@ -97,8 +141,8 @@
*/
int32_t sym_GetValue(struct Symbol const *sym)
{
- if (sym_IsNumeric(sym) && sym->callback)
- return sym->callback();
+ if (sym_IsNumeric(sym) && sym->hasCallback)
+ return sym->numCallback();
if (sym->type == SYM_LABEL)
/* TODO: do not use section's org directly */
@@ -107,15 +151,35 @@
return sym->value;
}
+static void dumpFilename(struct Symbol const *sym)
+{
+ if (!sym->src)
+ fputs("<builtin>", stderr);
+ else
+ fstk_Dump(sym->src, sym->fileLine);
+}
+
/*
+ * Set a symbol's definition filename and line
+ */
+static void setSymbolFilename(struct Symbol *sym)
+{
+ sym->src = fstk_GetFileStack();
+ sym->fileLine = lexer_GetLineNo();
+}
+
+/*
* Update a symbol's definition filename and line
*/
static void updateSymbolFilename(struct Symbol *sym)
{
- if (snprintf(sym->fileName, _MAX_PATH + 1, "%s",
- tzCurrentFileName) > _MAX_PATH)
- fatalerror("%s: File name is too long: '%s'\n", __func__, tzCurrentFileName);
- sym->fileLine = fstk_GetLine();
+ struct FileStackNode *oldSrc = sym->src;
+
+ setSymbolFilename(sym);
+ /* If the old node was referenced, ensure the new one is */
+ if (oldSrc->referenced && oldSrc->ID != -1)
+ out_RegisterNode(sym->src);
+ /* TODO: unref the old node, and use `out_ReplaceNode` instead if deleting it */
}
/*
@@ -133,8 +197,9 @@
symbol->isExported = false;
symbol->isBuiltin = false;
+ symbol->hasCallback = false;
symbol->section = NULL;
- updateSymbolFilename(symbol);
+ setSymbolFilename(symbol);
symbol->ID = -1;
symbol->next = NULL;
@@ -209,8 +274,7 @@
labelScope = NULL;
hash_RemoveElement(symbols, symbol->name);
- if (symbol->type == SYM_MACRO)
- free(symbol->macro);
+ /* TODO: ideally, also unref the file stack nodes */
free(symbol);
}
}
@@ -229,8 +293,23 @@
}
/*
- * Return a constant symbols value
+ * Return a constant symbol's value, assuming it's defined
*/
+uint32_t sym_GetConstantSymValue(struct Symbol const *sym)
+{
+ if (sym == PCSymbol)
+ return sym_GetPCValue();
+ else if (!sym_IsConstant(sym))
+ error("\"%s\" does not have a constant value\n", sym->name);
+ else
+ return sym_GetValue(sym);
+
+ return 0;
+}
+
+/*
+ * Return a constant symbol's value
+ */
uint32_t sym_GetConstantValue(char const *s)
{
struct Symbol const *sym = sym_FindSymbol(s);
@@ -237,12 +316,8 @@
if (sym == NULL)
error("'%s' not defined\n", s);
- else if (sym == PCSymbol)
- return sym_GetPCValue();
- else if (!sym_IsConstant(sym))
- error("\"%s\" does not have a constant value\n", s);
else
- return sym_GetValue(sym);
+ return sym_GetConstantSymValue(sym);
return 0;
}
@@ -285,9 +360,11 @@
if (!symbol)
symbol = createsymbol(symbolName);
- else if (sym_IsDefined(symbol))
- error("'%s' already defined at %s(%" PRIu32 ")\n", symbolName,
- symbol->fileName, symbol->fileLine);
+ else if (sym_IsDefined(symbol)) {
+ error("'%s' already defined at ", symbolName);
+ dumpFilename(symbol);
+ putc('\n', stderr);
+ }
return symbol;
}
@@ -300,7 +377,6 @@
struct Symbol *sym = createNonrelocSymbol(symName);
sym->type = SYM_EQU;
- sym->callback = NULL;
sym->value = value;
return sym;
@@ -343,18 +419,19 @@
{
struct Symbol *sym = findsymbol(symName, NULL);
- if (sym == NULL)
+ if (sym == NULL) {
sym = createsymbol(symName);
- else if (sym_IsDefined(sym) && sym->type != SYM_SET)
- error("'%s' already defined as %s at %s(%" PRIu32 ")\n",
- symName, sym->type == SYM_LABEL ? "label" : "constant",
- sym->fileName, sym->fileLine);
- else
- /* TODO: can the scope be incorrect when talking over refs? */
+ } else if (sym_IsDefined(sym) && sym->type != SYM_SET) {
+ error("'%s' already defined as %s at ",
+ symName, sym->type == SYM_LABEL ? "label" : "constant");
+ dumpFilename(sym);
+ putc('\n', stderr);
+ } else {
+ /* TODO: can the scope be incorrect when taking over refs? */
updateSymbolFilename(sym);
+ }
sym->type = SYM_SET;
- sym->callback = NULL;
sym->value = value;
return sym;
@@ -365,7 +442,7 @@
* @param name The label's full name (so `.name` is invalid)
* @return The created symbol
*/
-static struct Symbol *addSectionlessLabel(char const *name)
+static struct Symbol *addLabel(char const *name)
{
assert(name[0] != '.'); /* The symbol name must have been expanded prior */
struct Symbol *sym = findsymbol(name, NULL); /* Due to this, don't look for expansions */
@@ -373,26 +450,20 @@
if (!sym) {
sym = createsymbol(name);
} else if (sym_IsDefined(sym)) {
- error("'%s' already defined in %s(%" PRIu32 ")\n",
- name, sym->fileName, sym->fileLine);
+ error("'%s' already defined at ", name);
+ dumpFilename(sym);
+ putc('\n', stderr);
return NULL;
+ } else {
+ updateSymbolFilename(sym);
}
/* If the symbol already exists as a ref, just "take over" it */
sym->type = SYM_LABEL;
- sym->callback = NULL;
sym->value = sect_GetSymbolOffset();
if (exportall)
sym->isExported = true;
sym->section = sect_GetSymbolSection();
- updateSymbolFilename(sym);
- return sym;
-}
-
-static struct Symbol *addLabel(char const *name)
-{
- struct Symbol *sym = addSectionlessLabel(name);
-
if (sym && !sym->section)
error("Label \"%s\" created outside of a SECTION\n", name);
return sym;
@@ -467,14 +538,14 @@
/*
* Add a macro definition
*/
-struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo)
+struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size)
{
struct Symbol *sym = createNonrelocSymbol(symName);
sym->type = SYM_MACRO;
- sym->macroSize = ulNewMacroSize;
- sym->macro = tzNewMacro;
- updateSymbolFilename(sym);
+ sym->macroSize = size;
+ sym->macro = body;
+ setSymbolFilename(sym); /* TODO: is this really necessary? */
/*
* The symbol is created at the line after the `endm`,
* override this with the actual definition line
@@ -528,21 +599,36 @@
return ptr;
}
+static inline struct Symbol *createBuiltinSymbol(char const *name)
+{
+ struct Symbol *sym = createsymbol(name);
+
+ sym->isBuiltin = true;
+ sym->hasCallback = true;
+ sym->src = NULL;
+ sym->fileLine = 0;
+ return sym;
+}
+
/*
* Initialize the symboltable
*/
void sym_Init(void)
{
- struct Symbol *_NARGSymbol = sym_AddEqu("_NARG", 0);
- struct Symbol *__LINE__Symbol = sym_AddEqu("__LINE__", 0);
+ PCSymbol = createBuiltinSymbol("@");
+ struct Symbol *_NARGSymbol = createBuiltinSymbol("_NARG");
+ struct Symbol *__LINE__Symbol = createBuiltinSymbol("__LINE__");
+ struct Symbol *__FILE__Symbol = createBuiltinSymbol("__FILE__");
- PCSymbol = addSectionlessLabel("@");
- PCSymbol->isBuiltin = true;
- PCSymbol->callback = CallbackPC;
- _NARGSymbol->isBuiltin = true;
- _NARGSymbol->callback = Callback_NARG;
- __LINE__Symbol->isBuiltin = true;
- __LINE__Symbol->callback = Callback__LINE__;
+ PCSymbol->type = SYM_LABEL;
+ PCSymbol->section = NULL;
+ PCSymbol->numCallback = CallbackPC;
+ _NARGSymbol->type = SYM_EQU;
+ _NARGSymbol->numCallback = Callback_NARG;
+ __LINE__Symbol->type = SYM_EQU;
+ __LINE__Symbol->numCallback = Callback__LINE__;
+ __FILE__Symbol->type = SYM_EQUS;
+ __FILE__Symbol->strCallback = Callback__FILE__;
sym_AddSet("_RS", 0)->isBuiltin = true;
sym_AddEqu("__RGBDS_MAJOR__", PACKAGE_VERSION_MAJOR)->isBuiltin = true;
--- a/src/asm/util.c
+++ b/src/asm/util.c
@@ -6,6 +6,7 @@
* SPDX-License-Identifier: MIT
*/
+#include <ctype.h>
#include <stdint.h>
#include "asm/main.h"
@@ -25,6 +26,40 @@
hash = (hash * 33) ^ (*s++);
return hash;
+}
+
+char const *print(int c)
+{
+ static char buf[5]; /* '\xNN' + '\0' */
+
+ if (c == EOF)
+ return "EOF";
+
+ if (isprint(c)) {
+ buf[0] = c;
+ buf[1] = '\0';
+ return buf;
+ }
+
+ buf[0] = '\\';
+ switch (c) {
+ case '\n':
+ buf[1] = 'n';
+ break;
+ case '\r':
+ buf[1] = 'r';
+ break;
+ case '\t':
+ buf[1] = 't';
+ break;
+
+ default: /* Print as hex */
+ buf[1] = 'x';
+ sprintf(&buf[2], "%02hhx", c);
+ return buf;
+ }
+ buf[2] = '\0';
+ return buf;
}
size_t readUTF8Char(uint8_t *dest, char const *src)
--- a/src/asm/warning.c
+++ b/src/asm/warning.c
@@ -198,14 +198,14 @@
warnx("Unknown warning `%s`", flag);
}
-void verror(const char *fmt, va_list args, char const *flag)
+void printDiag(const char *fmt, va_list args, char const *type,
+ char const *flagfmt, char const *flag)
{
- fputs("ERROR: ", stderr);
- fstk_Dump();
- fprintf(stderr, flag ? ": [-Werror=%s]\n " : ":\n ", flag);
+ fputs(type, stderr);
+ fstk_DumpCurrent();
+ fprintf(stderr, flagfmt, flag);
vfprintf(stderr, fmt, args);
- fstk_DumpStringExpansions();
- nbErrors++;
+ lexer_DumpStringExpansions();
}
void error(const char *fmt, ...)
@@ -213,8 +213,9 @@
va_list args;
va_start(args, fmt);
- verror(fmt, args, NULL);
+ printDiag(fmt, args, "ERROR: ", ":\n ", NULL);
va_end(args);
+ nbErrors++;
}
noreturn_ void fatalerror(const char *fmt, ...)
@@ -222,7 +223,7 @@
va_list args;
va_start(args, fmt);
- verror(fmt, args, NULL);
+ printDiag(fmt, args, "FATAL: ", ":\n ", NULL);
va_end(args);
exit(1);
@@ -240,7 +241,7 @@
return;
case WARNING_ERROR:
- verror(fmt, args, flag);
+ printDiag(fmt, args, "ERROR: ", ": [-Werror=%s]\n ", flag);
va_end(args);
return;
@@ -252,11 +253,7 @@
break;
}
- fputs("warning: ", stderr);
- fstk_Dump();
- fprintf(stderr, ": [-W%s]\n ", flag);
- vfprintf(stderr, fmt, args);
- fstk_DumpStringExpansions();
+ printDiag(fmt, args, "warning: ", ": [-W%s]\n ", flag);
va_end(args);
}
--- a/src/link/assign.c
+++ b/src/link/assign.c
@@ -81,14 +81,14 @@
/* Check if this doesn't conflict with what the code says */
if (section->isBankFixed && placement->bank != section->bank)
- error("Linker script contradicts \"%s\"'s bank placement",
+ error(NULL, 0, "Linker script contradicts \"%s\"'s bank placement",
section->name);
if (section->isAddressFixed && placement->org != section->org)
- error("Linker script contradicts \"%s\"'s address placement",
+ error(NULL, 0, "Linker script contradicts \"%s\"'s address placement",
section->name);
if (section->isAlignFixed
&& (placement->org & section->alignMask) != 0)
- error("Linker script contradicts \"%s\"'s alignment",
+ error(NULL, 0, "Linker script contradicts \"%s\"'s alignment",
section->name);
section->isAddressFixed = true;
--- a/src/link/main.c
+++ b/src/link/main.c
@@ -6,8 +6,10 @@
* SPDX-License-Identifier: MIT
*/
+#include <assert.h>
#include <inttypes.h>
#include <stdbool.h>
+#include <stdarg.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
@@ -39,25 +41,73 @@
static uint32_t nbErrors = 0;
-void error(char const *fmt, ...)
+/***** Helper function to dump a file stack to stderr *****/
+
+char const *dumpFileStack(struct FileStackNode const *node)
{
+ char const *lastName;
+
+ if (node->parent) {
+ lastName = dumpFileStack(node->parent);
+ /* REPT nodes use their parent's name */
+ if (node->type != NODE_REPT)
+ lastName = node->name;
+ fprintf(stderr, "(%" PRIu32 ") -> %s", node->lineNo, lastName);
+ if (node->type == NODE_REPT) {
+ for (uint32_t i = 0; i < node->reptDepth; i++)
+ fprintf(stderr, "::REPT~%" PRIu32, node->iters[i]);
+ }
+ } else {
+ assert(node->type != NODE_REPT);
+ lastName = node->name;
+ fputs(lastName, stderr);
+ }
+
+ return lastName;
+}
+
+void warning(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...)
+{
va_list ap;
- fprintf(stderr, "error: ");
+ fputs("warning: ", stderr);
+ if (where) {
+ dumpFileStack(where);
+ fprintf(stderr, "(%" PRIu32 "): ", lineNo);
+ }
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
putc('\n', stderr);
+}
+void error(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...)
+{
+ va_list ap;
+
+ fputs("error: ", stderr);
+ if (where) {
+ dumpFileStack(where);
+ fprintf(stderr, "(%" PRIu32 "): ", lineNo);
+ }
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ putc('\n', stderr);
+
if (nbErrors != UINT32_MAX)
nbErrors++;
}
-noreturn_ void fatal(char const *fmt, ...)
+noreturn_ void fatal(struct FileStackNode const *where, uint32_t lineNo, char const *fmt, ...)
{
va_list ap;
- fprintf(stderr, "fatal: ");
+ fputs("fatal: ", stderr);
+ if (where) {
+ dumpFileStack(where);
+ fprintf(stderr, "(%" PRIu32 "): ", lineNo);
+ }
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
@@ -177,11 +227,11 @@
case 'p':
value = strtoul(optarg, &endptr, 0);
if (optarg[0] == '\0' || *endptr != '\0') {
- error("Invalid argument for option 'p'");
+ error(NULL, 0, "Invalid argument for option 'p'");
value = 0xFF;
}
if (value > 0xFF) {
- error("Argument for 'p' must be a byte (between 0 and 0xFF)");
+ error(NULL, 0, "Argument for 'p' must be a byte (between 0 and 0xFF)");
value = 0xFF;
}
padValue = value;
@@ -189,7 +239,7 @@
case 's':
/* FIXME: nobody knows what this does, figure it out */
(void)optarg;
- warnx("Nobody has any idea what `-s` does");
+ warning(NULL, 0, "Nobody has any idea what `-s` does");
break;
case 't':
is32kMode = true;
@@ -234,8 +284,8 @@
bankranges[SECTTYPE_VRAM][1] = BANK_MIN_VRAM;
/* Read all object files first, */
- while (curArgIndex < argc)
- obj_ReadFile(argv[curArgIndex++]);
+ for (obj_Setup(argc - curArgIndex); curArgIndex < argc; curArgIndex++)
+ obj_ReadFile(argv[curArgIndex], argc - curArgIndex - 1);
/* then process them, */
obj_DoSanityChecks();
--- a/src/link/object.c
+++ b/src/link/object.c
@@ -31,6 +31,11 @@
struct SymbolList *next;
} *symbolLists;
+unsigned int nbObjFiles;
+static struct {
+ struct FileStackNode *nodes;
+ uint32_t nbNodes;
+} *nodes;
static struct Assertion *assertions;
/***** Helper functions for reading object files *****/
@@ -170,12 +175,56 @@
/***** Functions to parse object files *****/
/**
- * Reads a RGB6 symbol from a file.
+ * Reads a file stack node form a file.
* @param file The file to read from
+ * @param nodes The file's array of nodes
+ * @param i The ID of the node in the array
+ * @param fileName The filename to report in errors
+ */
+static void readFileStackNode(FILE *file, struct FileStackNode fileNodes[], uint32_t i,
+ char const *fileName)
+{
+ uint32_t parentID;
+
+ tryReadlong(parentID, file,
+ "%s: Cannot read node #%" PRIu32 "'s parent ID: %s", fileName, i);
+ fileNodes[i].parent = parentID == -1 ? NULL : &fileNodes[parentID];
+ tryReadlong(fileNodes[i].lineNo, file,
+ "%s: Cannot read node #%" PRIu32 "'s line number: %s", fileName, i);
+ tryGetc(fileNodes[i].type, file, "%s: Cannot read node #%" PRIu32 "'s type: %s",
+ fileName, i);
+ switch (fileNodes[i].type) {
+ case NODE_FILE:
+ case NODE_MACRO:
+ tryReadstr(fileNodes[i].name, file,
+ "%s: Cannot read node #%" PRIu32 "'s file name: %s", fileName, i);
+ break;
+
+ case NODE_REPT:
+ tryReadlong(fileNodes[i].reptDepth, file,
+ "%s: Cannot read node #%" PRIu32 "'s rept depth: %s", fileName, i);
+ fileNodes[i].iters = malloc(sizeof(*fileNodes[i].iters) * fileNodes[i].reptDepth);
+ if (!fileNodes[i].iters)
+ fatal(NULL, 0, "%s: Failed to alloc node #%" PRIu32 "'s iters: %s",
+ fileName, i, strerror(errno));
+ for (uint32_t k = 0; k < fileNodes[i].reptDepth; k++)
+ tryReadlong(fileNodes[i].iters[k], file,
+ "%s: Cannot read node #%" PRIu32 "'s iter #%" PRIu32 ": %s",
+ fileName, i, k);
+ if (!fileNodes[i].parent)
+ fatal(NULL, 0, "%s is not a valid object file: root node (#%"
+ PRIu32 ") may not be REPT", fileName, i);
+ }
+}
+
+/**
+ * Reads a symbol from a file.
+ * @param file The file to read from
* @param symbol The struct to fill
* @param fileName The filename to report in errors
*/
-static void readSymbol(FILE *file, struct Symbol *symbol, char const *fileName)
+static void readSymbol(FILE *file, struct Symbol *symbol,
+ char const *fileName, struct FileStackNode fileNodes[])
{
tryReadstr(symbol->name, file, "%s: Cannot read symbol name: %s",
fileName);
@@ -184,9 +233,12 @@
/* If the symbol is defined in this file, read its definition */
if (symbol->type != SYMTYPE_IMPORT) {
symbol->objFileName = fileName;
- tryReadstr(symbol->fileName, file,
- "%s: Cannot read \"%s\"'s file name: %s",
+ uint32_t nodeID;
+
+ tryReadlong(nodeID, file,
+ "%s: Cannot read \"%s\"'s node ID: %s",
fileName, symbol->name);
+ symbol->src = &fileNodes[nodeID];
tryReadlong(symbol->lineNo, file,
"%s: Cannot read \"%s\"'s line number: %s",
fileName, symbol->name);
@@ -202,7 +254,7 @@
}
/**
- * Reads a RGB6 patch from a file.
+ * Reads a patch from a file.
* @param file The file to read from
* @param patch The struct to fill
* @param fileName The filename to report in errors
@@ -210,11 +262,17 @@
*/
static void readPatch(FILE *file, struct Patch *patch, char const *fileName,
char const *sectName, uint32_t i,
- struct Section *fileSections[])
+ struct Section *fileSections[], struct FileStackNode fileNodes[])
{
- tryReadstr(patch->fileName, file,
- "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s name: %s",
+ uint32_t nodeID;
+
+ tryReadlong(nodeID, file,
+ "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s node ID: %s",
fileName, sectName, i);
+ patch->src = &fileNodes[nodeID];
+ tryReadlong(patch->lineNo, file,
+ "%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s line number: %s",
+ fileName, sectName, i);
tryReadlong(patch->offset, file,
"%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s offset: %s",
fileName, sectName, i);
@@ -221,9 +279,8 @@
tryReadlong(patch->pcSectionID, file,
"%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s PC offset: %s",
fileName, sectName, i);
- patch->pcSection = patch->pcSectionID == -1
- ? NULL
- : fileSections[patch->pcSectionID];
+ patch->pcSection = patch->pcSectionID == -1 ? NULL
+ : fileSections[patch->pcSectionID];
tryReadlong(patch->pcOffset, file,
"%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s PC offset: %s",
fileName, sectName, i);
@@ -234,9 +291,11 @@
"%s: Unable to read \"%s\"'s patch #%" PRIu32 "'s RPN size: %s",
fileName, sectName, i);
- uint8_t *rpnExpression =
- malloc(sizeof(*rpnExpression) * patch->rpnSize);
- size_t nbElementsRead = fread(rpnExpression, sizeof(*rpnExpression),
+ patch->rpnExpression = malloc(sizeof(*patch->rpnExpression) * patch->rpnSize);
+ if (!patch->rpnExpression)
+ err(1, "%s: Failed to alloc \"%s\"'s patch #%" PRIu32 "'s RPN expression",
+ fileName, sectName, i);
+ size_t nbElementsRead = fread(patch->rpnExpression, sizeof(*patch->rpnExpression),
patch->rpnSize, file);
if (nbElementsRead != patch->rpnSize)
@@ -243,7 +302,6 @@
errx(1, "%s: Cannot read \"%s\"'s patch #%" PRIu32 "'s RPN expression: %s",
fileName, sectName, i,
feof(file) ? "Unexpected end of file" : strerror(errno));
- patch->rpnExpression = rpnExpression;
}
/**
@@ -252,8 +310,8 @@
* @param section The struct to fill
* @param fileName The filename to report in errors
*/
-static void readSection(FILE *file, struct Section *section,
- char const *fileName, struct Section *fileSections[])
+static void readSection(FILE *file, struct Section *section, char const *fileName,
+ struct Section *fileSections[], struct FileStackNode fileNodes[])
{
int32_t tmp;
uint8_t byte;
@@ -280,7 +338,7 @@
fileName, section->name);
section->isAddressFixed = tmp >= 0;
if (tmp > UINT16_MAX) {
- error("\"%s\"'s org is too large (%" PRId32 ")",
+ error(NULL, 0, "\"%s\"'s org is too large (%" PRId32 ")",
section->name, tmp);
tmp = UINT16_MAX;
}
@@ -296,7 +354,7 @@
tryReadlong(tmp, file, "%s: Cannot read \"%s\"'s alignment offset: %s",
fileName, section->name);
if (tmp > UINT16_MAX) {
- error("\"%s\"'s alignment offset is too large (%" PRId32 ")",
+ error(NULL, 0, "\"%s\"'s alignment offset is too large (%" PRId32 ")",
section->name, tmp);
tmp = UINT16_MAX;
}
@@ -332,7 +390,7 @@
section->name);
for (uint32_t i = 0; i < section->nbPatches; i++) {
readPatch(file, &patches[i], fileName, section->name,
- i, fileSections);
+ i, fileSections, fileNodes);
}
section->patches = patches;
}
@@ -375,13 +433,13 @@
*/
static void readAssertion(FILE *file, struct Assertion *assert,
char const *fileName, uint32_t i,
- struct Section *fileSections[])
+ struct Section *fileSections[], struct FileStackNode fileNodes[])
{
char assertName[sizeof("Assertion #" EXPAND_AND_STR(UINT32_MAX))];
snprintf(assertName, sizeof(assertName), "Assertion #%" PRIu32, i);
- readPatch(file, &assert->patch, fileName, assertName, 0, fileSections);
+ readPatch(file, &assert->patch, fileName, assertName, 0, fileSections, fileNodes);
tryReadstr(assert->message, file, "%s: Cannot read assertion's message: %s",
fileName);
}
@@ -394,11 +452,7 @@
return section;
}
-/**
- * Reads an object file of any supported format
- * @param fileName The filename to report for errors
- */
-void obj_ReadFile(char const *fileName)
+void obj_ReadFile(char const *fileName, unsigned int fileID)
{
FILE *file = strcmp("-", fileName) ? fopen(fileName, "rb") : stdin;
@@ -438,6 +492,14 @@
nbSectionsToAssign += nbSections;
+ tryReadlong(nodes[fileID].nbNodes, file, "%s: Cannot read number of nodes: %s", fileName);
+ nodes[fileID].nodes = calloc(nodes[fileID].nbNodes, sizeof(nodes[fileID].nodes[0]));
+ if (!nodes[fileID].nodes)
+ err(1, "Failed to get memory for %s's nodes", fileName);
+ verbosePrint("Reading %u nodes...\n", nodes[fileID].nbNodes);
+ for (uint32_t i = 0; i < nodes[fileID].nbNodes; i++)
+ readFileStackNode(file, nodes[fileID].nodes, i, fileName);
+
/* This file's symbols, kept to link sections to them */
struct Symbol **fileSymbols =
malloc(sizeof(*fileSymbols) * nbSymbols + 1);
@@ -464,7 +526,7 @@
if (!symbol)
err(1, "%s: Couldn't create new symbol", fileName);
- readSymbol(file, symbol, fileName);
+ readSymbol(file, symbol, fileName, nodes[fileID].nodes);
fileSymbols[i] = symbol;
if (symbol->type == SYMTYPE_EXPORT)
@@ -485,7 +547,7 @@
err(1, "%s: Couldn't create new section", fileName);
fileSections[i]->nextu = NULL;
- readSection(file, fileSections[i], fileName, fileSections);
+ readSection(file, fileSections[i], fileName, fileSections, nodes[fileID].nodes);
fileSections[i]->fileSymbols = fileSymbols;
if (nbSymPerSect[i]) {
fileSections[i]->symbols = malloc(nbSymPerSect[i]
@@ -535,7 +597,7 @@
if (!assertion)
err(1, "%s: Couldn't create new assertion", fileName);
- readAssertion(file, assertion, fileName, i, fileSections);
+ readAssertion(file, assertion, fileName, i, fileSections, nodes[fileID].nodes);
assertion->fileSymbols = fileSymbols;
assertion->next = assertions;
assertions = assertion;
@@ -555,6 +617,15 @@
patch_CheckAssertions(assertions);
}
+void obj_Setup(unsigned int nbFiles)
+{
+ nbObjFiles = nbFiles;
+
+ if (nbFiles > SIZE_MAX / sizeof(*nodes))
+ fatal(NULL, 0, "Impossible to link more than %zu files!", SIZE_MAX / sizeof(*nodes));
+ nodes = malloc(sizeof(*nodes) * nbFiles);
+}
+
static void freeSection(struct Section *section, void *arg)
{
(void)arg;
@@ -562,12 +633,8 @@
free(section->name);
if (sect_HasData(section->type)) {
free(section->data);
- for (int32_t i = 0; i < section->nbPatches; i++) {
- struct Patch *patch = §ion->patches[i];
-
- free(patch->fileName);
- free(patch->rpnExpression);
- }
+ for (int32_t i = 0; i < section->nbPatches; i++)
+ free(section->patches[i].rpnExpression);
free(section->patches);
}
free(section->symbols);
@@ -577,13 +644,20 @@
static void freeSymbol(struct Symbol *symbol)
{
free(symbol->name);
- if (symbol->type != SYMTYPE_IMPORT)
- free(symbol->fileName);
free(symbol);
}
void obj_Cleanup(void)
{
+ for (unsigned int i = 0; i < nbObjFiles; i++) {
+ for (uint32_t j = 0; j < nodes[i].nbNodes; j++) {
+ if (nodes[i].nodes[j].type == NODE_REPT)
+ free(nodes[i].nodes[j].iters);
+ }
+ free(nodes[i].nodes);
+ }
+ free(nodes);
+
sym_CleanupSymbols();
sect_ForEach(freeSection, NULL);
--- a/src/link/patch.c
+++ b/src/link/patch.c
@@ -6,11 +6,13 @@
* SPDX-License-Identifier: MIT
*/
+#include <assert.h>
#include <inttypes.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
+#include "link/object.h"
#include "link/patch.h"
#include "link/section.h"
#include "link/symbol.h"
@@ -104,10 +106,10 @@
stack.size++;
}
-static int32_t popRPN(char const *fileName)
+static int32_t popRPN(struct FileStackNode const *node, uint32_t lineNo)
{
if (stack.size == 0)
- errx(1, "%s: Internal error, RPN stack empty", fileName);
+ fatal(node, lineNo, "Internal error, RPN stack empty");
stack.size--;
return stack.buf[stack.size];
@@ -121,10 +123,11 @@
/* RPN operators */
static uint32_t getRPNByte(uint8_t const **expression, int32_t *size,
- char const *fileName)
+ struct FileStackNode const *node, uint32_t lineNo)
{
if (!(*size)--)
- errx(1, "%s: RPN expression overread", fileName);
+ fatal(node, lineNo, "Internal error, RPN expression overread");
+
return *(*expression)++;
}
@@ -131,6 +134,7 @@
static struct Symbol const *getSymbol(struct Symbol const * const *symbolList,
uint32_t index)
{
+ assert(index != -1); /* PC needs to be handled specially, not here */
struct Symbol const *symbol = symbolList[index];
/* If the symbol is defined elsewhere... */
@@ -150,7 +154,7 @@
struct Symbol const * const *fileSymbols)
{
/* Small shortcut to avoid a lot of repetition */
-#define popRPN() popRPN(patch->fileName)
+#define popRPN() popRPN(patch->src, patch->lineNo)
uint8_t const *expression = patch->rpnExpression;
int32_t size = patch->rpnSize;
@@ -159,7 +163,7 @@
while (size > 0) {
enum RPNCommand command = getRPNByte(&expression, &size,
- patch->fileName);
+ patch->src, patch->lineNo);
int32_t value;
/*
@@ -187,7 +191,7 @@
case RPN_DIV:
value = popRPN();
if (value == 0) {
- error("%s: Division by 0", patch->fileName);
+ error(patch->src, patch->lineNo, "Division by 0");
popRPN();
value = INT32_MAX;
} else {
@@ -197,7 +201,7 @@
case RPN_MOD:
value = popRPN();
if (value == 0) {
- error("%s: Modulo by 0", patch->fileName);
+ error(patch->src, patch->lineNo, "Modulo by 0");
popRPN();
value = 0;
} else {
@@ -269,17 +273,17 @@
value = 0;
for (uint8_t shift = 0; shift < 32; shift += 8)
value |= getRPNByte(&expression, &size,
- patch->fileName) << shift;
+ patch->src, patch->lineNo) << shift;
symbol = getSymbol(fileSymbols, value);
if (!symbol) {
- error("%s: Requested BANK() of symbol \"%s\", which was not found",
- patch->fileName,
+ error(patch->src, patch->lineNo,
+ "Requested BANK() of symbol \"%s\", which was not found",
fileSymbols[value]->name);
value = 1;
} else if (!symbol->section) {
- error("%s: Requested BANK() of non-label symbol \"%s\"",
- patch->fileName,
+ error(patch->src, patch->lineNo,
+ "Requested BANK() of non-label symbol \"%s\"",
fileSymbols[value]->name);
value = 1;
} else {
@@ -289,14 +293,15 @@
case RPN_BANK_SECT:
name = (char const *)expression;
- while (getRPNByte(&expression, &size, patch->fileName))
+ while (getRPNByte(&expression, &size, patch->src, patch->lineNo))
;
sect = sect_GetSection(name);
if (!sect) {
- error("%s: Requested BANK() of section \"%s\", which was not found",
- patch->fileName, name);
+ error(patch->src, patch->lineNo,
+ "Requested BANK() of section \"%s\", which was not found",
+ name);
value = 1;
} else {
value = sect->bank;
@@ -305,7 +310,8 @@
case RPN_BANK_SELF:
if (!patch->pcSection) {
- error("%s: PC has no bank outside a section");
+ error(patch->src, patch->lineNo,
+ "PC has no bank outside a section");
value = 1;
} else {
value = patch->pcSection->bank;
@@ -317,8 +323,8 @@
if (value < 0
|| (value > 0xFF && value < 0xFF00)
|| value > 0xFFFF)
- error("%s: Value %" PRId32 " is not in HRAM range",
- patch->fileName, value);
+ error(patch->src, patch->lineNo,
+ "Value %" PRId32 " is not in HRAM range", value);
value &= 0xFF;
break;
@@ -328,8 +334,8 @@
* They can be easily checked with a bitmask
*/
if (value & ~0x38)
- error("%s: Value %" PRId32 " is not a RST vector",
- patch->fileName, value);
+ error(patch->src, patch->lineNo,
+ "Value %" PRId32 " is not a RST vector", value);
value |= 0xC7;
break;
@@ -337,7 +343,7 @@
value = 0;
for (uint8_t shift = 0; shift < 32; shift += 8)
value |= getRPNByte(&expression, &size,
- patch->fileName) << shift;
+ patch->src, patch->lineNo) << shift;
break;
case RPN_SYM:
@@ -344,25 +350,28 @@
value = 0;
for (uint8_t shift = 0; shift < 32; shift += 8)
value |= getRPNByte(&expression, &size,
- patch->fileName) << shift;
+ patch->src, patch->lineNo) << shift;
- symbol = getSymbol(fileSymbols, value);
-
- if (!symbol) {
- error("%s: Unknown symbol \"%s\"",
- patch->fileName,
- fileSymbols[value]->name);
- } else if (strcmp(symbol->name, "@")) {
- value = symbol->value;
- /* Symbols attached to sections have offsets */
- if (symbol->section)
- value += symbol->section->org;
- } else if (!patch->pcSection) {
- error("%s: PC has no value outside a section",
- patch->fileName);
- value = 0;
+ if (value == -1) { /* PC */
+ if (!patch->pcSection) {
+ error(patch->src, patch->lineNo,
+ "PC has no value outside a section");
+ value = 0;
+ } else {
+ value = patch->pcOffset + patch->pcSection->org;
+ }
} else {
- value = patch->pcOffset + patch->pcSection->org;
+ symbol = getSymbol(fileSymbols, value);
+
+ if (!symbol) {
+ error(patch->src, patch->lineNo,
+ "Unknown symbol \"%s\"", fileSymbols[value]->name);
+ } else {
+ value = symbol->value;
+ /* Symbols attached to sections have offsets */
+ if (symbol->section)
+ value += symbol->section->org;
+ }
}
break;
}
@@ -371,8 +380,8 @@
}
if (stack.size > 1)
- error("%s: RPN stack has %zu entries on exit, not 1",
- patch->fileName, stack.size);
+ error(patch->src, patch->lineNo,
+ "RPN stack has %zu entries on exit, not 1", stack.size);
return popRPN();
@@ -390,20 +399,20 @@
assert->fileSymbols)) {
switch ((enum AssertionType)assert->patch.type) {
case ASSERT_FATAL:
- fatal("%s: %s", assert->patch.fileName,
+ fatal(assert->patch.src, assert->patch.lineNo, "%s",
assert->message[0] ? assert->message
: "assert failure");
/* Not reached */
break; /* Here so checkpatch doesn't complain */
case ASSERT_ERROR:
- error("%s: %s", assert->patch.fileName,
+ error(assert->patch.src, assert->patch.lineNo, "%s",
assert->message[0] ? assert->message
: "assert failure");
break;
case ASSERT_WARN:
- warnx("%s: %s", assert->patch.fileName,
- assert->message[0] ? assert->message
- : "assert failure");
+ warning(assert->patch.src, assert->patch.lineNo, "%s",
+ assert->message[0] ? assert->message
+ : "assert failure");
break;
}
}
@@ -442,8 +451,9 @@
int16_t jumpOffset = value - address;
if (jumpOffset < -128 || jumpOffset > 127)
- error("%s: jr target out of reach (expected -129 < %" PRId16 " < 128)",
- patch->fileName, jumpOffset);
+ error(patch->src, patch->lineNo,
+ "jr target out of reach (expected -129 < %" PRId16 " < 128)",
+ jumpOffset);
dataSection->data[offset] = jumpOffset & 0xFF;
} else {
/* Patch a certain number of bytes */
@@ -459,9 +469,9 @@
if (value < types[patch->type].min
|| value > types[patch->type].max)
- error("%s: Value %#" PRIx32 "%s is not %u-bit",
- patch->fileName, value,
- value < 0 ? " (maybe negative?)" : "",
+ error(patch->src, patch->lineNo,
+ "Value %#" PRIx32 "%s is not %u-bit",
+ value, value < 0 ? " (maybe negative?)" : "",
types[patch->type].size * 8U);
for (uint8_t i = 0; i < types[patch->type].size; i++) {
dataSection->data[offset + i] = value & 0xFF;
--- a/src/link/symbol.c
+++ b/src/link/symbol.c
@@ -8,9 +8,12 @@
#include <inttypes.h>
#include <stdbool.h>
+#include <stdlib.h>
+#include "link/object.h"
#include "link/symbol.h"
#include "link/main.h"
+
#include "extern/err.h"
#include "hashmap.h"
@@ -40,11 +43,15 @@
/* Check if the symbol already exists */
struct Symbol *other = hash_GetElement(symbols, symbol->name);
- if (other)
- errx(1, "\"%s\" both in %s from %s(%" PRId32 ") and in %s from %s(%" PRId32 ")",
- symbol->name,
- symbol->objFileName, symbol->fileName, symbol->lineNo,
- other->objFileName, other->fileName, other->lineNo);
+ if (other) {
+ fprintf(stderr, "error: \"%s\" both in %s from ", symbol->name, symbol->objFileName);
+ dumpFileStack(symbol->src);
+ fprintf(stderr, "(%" PRIu32 ") and in %s from ",
+ symbol->lineNo, other->objFileName);
+ dumpFileStack(other->src);
+ fprintf(stderr, "(%" PRIu32 ")\n", other->lineNo);
+ exit(1);
+ }
/* If not, add it */
bool collided = hash_AddElement(symbols, symbol->name, symbol);
--- a/src/rgbds.5
+++ b/src/rgbds.5
@@ -16,7 +16,7 @@
.Xr rgbasm 1
and
.Xr rgblink 1 .
-.Em Please note that the specifications may change.
+.Em Please note that the specifications may change .
This toolchain is in development and new features may require adding more information to the current format, or modifying some fields, which would break compatibility with older versions.
.Pp
.Sh FILE STRUCTURE
@@ -34,34 +34,67 @@
; Header
BYTE ID[4] ; "RGB9"
-LONG RevisionNumber ; The format's revision number this file uses
-LONG NumberOfSymbols ; The number of symbols used in this file
-LONG NumberOfSections ; The number of sections used in this file
+LONG RevisionNumber ; The format's revision number this file uses.
+LONG NumberOfSymbols ; The number of symbols used in this file.
+LONG NumberOfSections ; The number of sections used in this file.
+; File info
+
+LONG NumberOfNodes ; The number of nodes contained in this file.
+
+REPT NumberOfNodes ; IMPORTANT NOTE: the nodes are actually written in
+ ; **reverse** order, meaningthe node with ID 0 is
+ ; the last one in the file!
+
+ LONG ParentID ; ID of the parent node, -1 means this is the root.
+
+ LONG ParentLineNo ; Line at which the parent context was exited.
+ ; Meaningless on the root node.
+
+ BYTE Type ; 0 = REPT node
+ ; 1 = File node
+ ; 2 = Macro node
+
+ IF Type != 0 ; If the node is not a REPT...
+
+ STRING Name ; The node's name: either a file name, or macro name
+ ; prefixed by its definition file name.
+
+ ELSE ; If the node is a REPT, it also contains the iter
+ ; counts of all the parent REPTs.
+
+ LONG Depth ; Size of the array below.
+
+ LONG Iter[Depth] ; The number of REPT iterations by increasing depth.
+
+ ENDC
+
+ENDR
+
; Symbols
-REPT NumberOfSymbols ; Number of symbols defined in this object file.
+REPT NumberOfSymbols ; Number of symbols defined in this object file.
- STRING Name ; The name of this symbol. Local symbols are stored
- ; as "Scope.Symbol".
+ STRING Name ; The name of this symbol. Local symbols are stored
+ ; as "Scope.Symbol".
- BYTE Type ; 0 = LOCAL symbol only used in this file.
- ; 1 = IMPORT this symbol from elsewhere
- ; 2 = EXPORT this symbol to other objects.
+ BYTE Type ; 0 = LOCAL symbol only used in this file.
+ ; 1 = IMPORT this symbol from elsewhere
+ ; 2 = EXPORT this symbol to other objects.
- IF (Type & 0x7F) != 1 ; If symbol is defined in this object file.
+ IF (Type & 0x7F) != 1 ; If symbol is defined in this object file.
- STRING FileName ; File where the symbol is defined.
+ LONG SourceFile ; File where the symbol is defined.
- LONG LineNum ; Line number in the file where the symbol is defined.
+ LONG LineNum ; Line number in the file where the symbol is defined.
- LONG SectionID ; The section number (of this object file) in which
- ; this symbol is defined. If it doesn't belong to any
- ; specific section (like a constant), this field has
- ; the value -1.
+ LONG SectionID ; The section number (of this object file) in which
+ ; this symbol is defined. If it doesn't belong to any
+ ; specific section (like a constant), this field has
+ ; the value -1.
- LONG Value ; The symbols value. It's the offset into that
- ; symbol's section.
+ LONG Value ; The symbols value. It's the offset into that
+ ; symbol's section.
ENDC
@@ -107,9 +140,11 @@
REPT NumberOfPatches
- STRING SourceFile ; Name of the source file (for printing error
- ; messages).
+ LONG SourceFile ; ID of the source file node (for printing
+ ; error messages).
+ LONG LineNo ; Line at which the patch was created.
+
LONG Offset ; Offset into the section where patch should
; be applied (in bytes).
@@ -145,8 +180,10 @@
REPT NumberOfAssertions
- STRING SourceFile ; Name of the source file (for printing the failure).
+ LONG SourceFile ; ID of the source file node (for printing the failure).
+ LONG LineNo ; Line at which the assertion was created.
+
LONG Offset ; Offset into the section where the assertion is located.
LONG SectionID ; Index within the file of the section in which PC is
@@ -209,7 +246,7 @@
.It Li $50 Ta Li BANK(symbol) ,
a
.Ar LONG
-Symbol ID follows.
+Symbol ID follows, where -1 means PC
.It Li $51 Ta Li BANK(section_name) ,
a null-terminated string follows.
.It Li $52 Ta Li Current BANK()
--- a/test/asm/assert.err
+++ b/test/asm/assert.err
@@ -6,5 +6,5 @@
Expected constant expression: 'FloatingBase' is not constant at assembly time
ERROR: assert.asm(18):
Assertion failed
-ERROR: assert.asm(21):
+FATAL: assert.asm(21):
Assertion failed
--- a/test/asm/divzero-instr.err
+++ b/test/asm/divzero-instr.err
@@ -1,2 +1,2 @@
-ERROR: divzero-instr.asm(2):
+FATAL: divzero-instr.asm(2):
Division by zero
--- a/test/asm/divzero-section-bank.err
+++ b/test/asm/divzero-section-bank.err
@@ -1,2 +1,2 @@
-ERROR: divzero-section-bank.asm(1):
+FATAL: divzero-section-bank.asm(1):
Division by zero
--- /dev/null
+++ b/test/asm/equs-nest.asm
@@ -1,0 +1,4 @@
+X1 equs "Y1 equs \"\\\"Success!\\\\n\\\"\""
+Y1 equs "Z1"
+X1
+ PRINTT Z1
--- /dev/null
+++ b/test/asm/equs-nest.out
@@ -1,0 +1,1 @@
+Success!
--- /dev/null
+++ b/test/asm/equs-newline.asm
@@ -1,0 +1,4 @@
+
+ACT equs "WARN \"First\"\nWARN \"Second\""
+ ACT
+ WARN "Third"
--- /dev/null
+++ b/test/asm/equs-newline.err
@@ -1,0 +1,7 @@
+warning: equs-newline.asm(3): [-Wuser]
+ First
+while expanding symbol "ACT"
+warning: equs-newline.asm(3): [-Wuser]
+ Second
+warning: equs-newline.asm(4): [-Wuser]
+ Third
--- /dev/null
+++ b/test/asm/equs-purge.asm
@@ -1,0 +1,2 @@
+BYE equs "PURGE BYE\nWARN \"Crash?\"\n \n"
+BYE
--- /dev/null
+++ b/test/asm/equs-purge.err
@@ -1,0 +1,3 @@
+warning: equs-purge.asm(2): [-Wuser]
+ Crash?
+while expanding symbol "BYE"
--- a/test/asm/equs-recursion.asm
+++ b/test/asm/equs-recursion.asm
@@ -1,2 +1,6 @@
-recurse EQUS "recurse"
-recurse
\ No newline at end of file
+recurse EQUS "recurse "
+recurse
+
+; FIXME: also handle the following:
+; recurse EQUS "recurse"
+; recurse
--- a/test/asm/equs-recursion.err
+++ b/test/asm/equs-recursion.err
@@ -1,5 +1,6 @@
-ERROR: equs-recursion.asm(2):
+FATAL: equs-recursion.asm(2):
Recursion limit (64) exceeded
+while expanding symbol "recurse"
while expanding symbol "recurse"
while expanding symbol "recurse"
while expanding symbol "recurse"
--- /dev/null
+++ b/test/asm/file-sym.asm
@@ -1,0 +1,1 @@
+PRINTT "{__FILE__}\n"
--- /dev/null
+++ b/test/asm/file-sym.out
@@ -1,0 +1,1 @@
+"file-sym.asm"
--- a/test/asm/garbage_char.asm
+++ b/test/asm/garbage_char.asm
@@ -1,1 +1,1 @@
-x
\ No newline at end of file
+--- a/test/asm/garbage_char.err
+++ b/test/asm/garbage_char.err
@@ -1,2 +1,3 @@
ERROR: garbage_char.asm(1):
- Found garbage character: 0xFF
+ Unknown character 0xFF
+error: Assembly aborted (1 errors)!
--- /dev/null
+++ b/test/asm/if-macro.asm
@@ -1,0 +1,13 @@
+m: macro
+ if 0
+ WARN "3"
+ else
+ WARN "5"
+ endc
+endm
+
+if 1
+ m
+else
+ WARN "12"
+endc
--- /dev/null
+++ b/test/asm/if-macro.err
@@ -1,0 +1,2 @@
+warning: if-macro.asm(10) -> if-macro.asm::m(5): [-Wuser]
+ 5
--- a/test/asm/[email protected]
+++ b/test/asm/[email protected]
@@ -1,2 +1,2 @@
-if {@}
+if "{@}"
endc
--- a/test/asm/include-recursion.err
+++ b/test/asm/include-recursion.err
@@ -1,2 +1,2 @@
-ERROR: include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1):
+FATAL: include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1) -> include-recursion.asm(1):
Recursion limit (64) exceeded
--- a/test/asm/label-redefinition.err
+++ b/test/asm/label-redefinition.err
@@ -1,3 +1,3 @@
ERROR: label-redefinition.asm(7):
- 'Sym' already defined in label-redefinition.asm::m(6)
+ 'Sym' already defined at label-redefinition.asm(6) -> label-redefinition.asm::m(4)
error: Assembly aborted (1 errors)!
--- a/test/asm/line-continuation-macro.asm
+++ b/test/asm/line-continuation-macro.asm
@@ -2,6 +2,7 @@
ENDM
m2: MACRO
- m \ ENDM
+ m \
+ENDM
m2
--- a/test/asm/line-continuation-rept.asm
+++ b/test/asm/line-continuation-rept.asm
@@ -2,7 +2,9 @@
ENDM
REPT 1
- m ENDR
+ m
+ENDR
REPT 1
- m \ ENDR
+ m \
+ENDR
--- a/test/asm/line-continuation-whitespace.asm
+++ b/test/asm/line-continuation-whitespace.asm
@@ -4,4 +4,4 @@
bar: MACRO
ENDM
-foo: bar baz\
+foo: bar baz\
--- a/test/asm/load-overflow.err
+++ b/test/asm/load-overflow.err
@@ -1,2 +1,2 @@
-ERROR: load-overflow.asm(4):
+FATAL: load-overflow.asm(4):
Section 'Overflow' grew too big (max size = 0x8000 bytes, reached 0x8001).
--- a/test/asm/local-purge.err
+++ b/test/asm/local-purge.err
@@ -1,3 +1,3 @@
ERROR: local-purge.asm(8):
- '.loc' not defined
+ Interpolated symbol ".loc" does not exist
error: Assembly aborted (1 errors)!
--- a/test/asm/local-purge.out
+++ b/test/asm/local-purge.out
@@ -1,1 +1,1 @@
-$0
+
--- a/test/asm/local-ref-without-parent.err
+++ b/test/asm/local-ref-without-parent.err
@@ -1,2 +1,2 @@
-ERROR: local-ref-without-parent.asm(3):
+FATAL: local-ref-without-parent.asm(3):
Local label reference '.test' in main scope
--- /dev/null
+++ b/test/asm/macro-line-no.asm
@@ -1,0 +1,8 @@
+
+WARN "Line 2"
+m: macro
+ WARN "Line 4"
+endm
+WARN "Line 6"
+ m
+WARN "Line 8"
--- /dev/null
+++ b/test/asm/macro-line-no.err
@@ -1,0 +1,8 @@
+warning: macro-line-no.asm(2): [-Wuser]
+ Line 2
+warning: macro-line-no.asm(6): [-Wuser]
+ Line 6
+warning: macro-line-no.asm(7) -> macro-line-no.asm::m(4): [-Wuser]
+ Line 4
+warning: macro-line-no.asm(8): [-Wuser]
+ Line 8
--- a/test/asm/macro-recursion.err
+++ b/test/asm/macro-recursion.err
@@ -1,2 +1,2 @@
-ERROR: macro-recursion.asm(4) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2):
+FATAL: macro-recursion.asm(4) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2) -> macro-recursion.asm::recurse(2):
Recursion limit (64) exceeded
--- /dev/null
+++ b/test/asm/narg-nosect.asm
@@ -1,0 +1,1 @@
+ PRINTT "{_NARG}\n"
--- /dev/null
+++ b/test/asm/narg-nosect.err
@@ -1,0 +1,3 @@
+ERROR: narg-nosect.asm(1):
+ _NARG does not make sense outside of a macro
+error: Assembly aborted (1 errors)!
--- /dev/null
+++ b/test/asm/narg-nosect.out
@@ -1,0 +1,1 @@
+$0
--- a/test/asm/nested-brackets.asm
+++ b/test/asm/nested-brackets.asm
@@ -3,3 +3,4 @@
PRINTT "{S{WRAPPER}G}\n"
PRINTT "{S{WRAPPER}G"
+ PRINTT "\n"
--- a/test/asm/nested-brackets.err
+++ b/test/asm/nested-brackets.err
@@ -1,2 +1,3 @@
ERROR: nested-brackets.asm(5):
Missing }
+error: Assembly aborted (1 errors)!
--- a/test/asm/nested-brackets.out
+++ b/test/asm/nested-brackets.out
@@ -1,1 +1,2 @@
OK
+OK
binary files a/test/asm/null-in-macro.asm b/test/asm/null-in-macro.asm differ
--- a/test/asm/null-in-macro.err
+++ b/test/asm/null-in-macro.err
@@ -1,2 +1,3 @@
-ERROR: null-in-macro.asm(2):
- Found null character
+ERROR: null-in-macro.asm(4) -> null-in-macro.asm::foo(2):
+ Unknown character 0x00
+error: Assembly aborted (1 errors)!
--- /dev/null
+++ b/test/asm/opt-b.asm
@@ -1,0 +1,3 @@
+OPT b.X
+PRINTV %..X.X.X.
+PRINTT "\n"
--- /dev/null
+++ b/test/asm/opt-b.out
@@ -1,0 +1,1 @@
+$2A
--- /dev/null
+++ b/test/asm/opt-g.asm
@@ -1,0 +1,3 @@
+OPT g.x0X
+PRINTV `.x.x0X0X
+PRINTT "\n"
--- /dev/null
+++ b/test/asm/opt-g.out
@@ -1,0 +1,1 @@
+$F55
--- a/test/asm/overflow.err
+++ b/test/asm/overflow.err
@@ -3,6 +3,6 @@
warning: overflow.asm(25): [-Wdiv]
Division of -2147483648 by -1 yields -2147483648
warning: overflow.asm(39): [-Wlarge-constant]
- Integer constant '4294967296' is too large
+ Integer constant is too large
warning: overflow.asm(42): [-Wlarge-constant]
- Graphics constant '`333333333' is too long
+ Graphics constant is too long, only 8 first pixels considered
--- a/test/asm/pops-no-pushed-sections.err
+++ b/test/asm/pops-no-pushed-sections.err
@@ -1,2 +1,2 @@
-ERROR: pops-no-pushed-sections.asm(1):
+FATAL: pops-no-pushed-sections.asm(1):
No entries in the section stack
--- a/test/asm/pops-restore-no-section.err
+++ b/test/asm/pops-restore-no-section.err
@@ -1,4 +1,4 @@
ERROR: pops-restore-no-section.asm(9):
Label "DisallowedContent" created outside of a SECTION
-ERROR: pops-restore-no-section.asm(10):
+FATAL: pops-restore-no-section.asm(10):
Code generation before SECTION directive
--- a/test/asm/remote-local-noexist.err
+++ b/test/asm/remote-local-noexist.err
@@ -1,2 +1,2 @@
-ERROR: remote-local-noexist.asm(7):
+FATAL: remote-local-noexist.asm(7):
'Parent.child.fail' is a nonsensical reference to a nested local symbol
--- /dev/null
+++ b/test/asm/rept-0.asm
@@ -1,0 +1,3 @@
+REPT 0
+ WARN "2"
+ENDR
--- a/test/asm/rept-shift.err
+++ b/test/asm/rept-shift.err
@@ -1,2 +1,2 @@
-ERROR: rept-shift.asm(17) -> rept-shift.asm::m(14):
+FATAL: rept-shift.asm(17) -> rept-shift.asm::m(14):
Macro argument '\1' not defined
--- a/test/asm/section-union.err
+++ b/test/asm/section-union.err
@@ -6,5 +6,5 @@
Section "test" already declared as fixed at $c000
ERROR: section-union.asm(37):
Section "test" already declared as aligned to 256 bytes
-ERROR: section-union.asm(37):
+FATAL: section-union.asm(37):
Cannot create section "test" (3 errors)
--- a/test/asm/sym-collision.err
+++ b/test/asm/sym-collision.err
@@ -1,3 +1,3 @@
ERROR: sym-collision.asm(26):
- 'dork' not defined
+ Interpolated symbol "dork" does not exist
error: Assembly aborted (1 errors)!
--- a/test/asm/sym-collision.out
+++ b/test/asm/sym-collision.out
@@ -1,7 +1,7 @@
aqfj: $FE00
cxje: $FE01
dgsd: $FE02
-dork: $0
+dork:
lxok: $FE04
psgp: $FE05
sfly: $FE06
--- a/test/asm/symbol-invalid-macro-arg.err
+++ b/test/asm/symbol-invalid-macro-arg.err
@@ -1,2 +1,2 @@
-ERROR: symbol-invalid-macro-arg.asm(1):
- Invalid macro argument '\0' in symbol
+FATAL: symbol-invalid-macro-arg.asm(1):
+ Invalid macro argument '\0'
--- a/test/asm/test.sh
+++ b/test/asm/test.sh
@@ -12,6 +12,7 @@
bold=$(tput bold)
resbold=$(tput sgr0)
red=$(tput setaf 1)
+green=$(tput setaf 2)
rescolors=$(tput op)
tryDiff () {
diff -u --strip-trailing-cr $1 $2 || (echo "${bold}${red}${i%.asm}${variant}.$3 mismatch!${rescolors}${resbold}"; false)
@@ -36,6 +37,7 @@
for i in *.asm; do
for variant in '' '.pipe'; do
+ echo -e "${bold}${green}${i%.asm}${variant}...${rescolors}${resbold}"
if [ -z "$variant" ]; then
../../rgbasm -Weverything -o $o $i > $output 2> $errput
desired_output=${i%.asm}.out
@@ -59,8 +61,8 @@
# Escape regex metacharacters
subst="$(printf '%s\n' "$i" | sed 's:[][\/.^$*]:\\&:g')"
# Replace the file name with a dash to match changed output
- sed "s/$subst/-/g" ${i%.asm}.out > $desired_output
- sed "s/$subst/-/g" ${i%.asm}.err > $desired_errput
+ sed "s/$subst/<stdin>/g" ${i%.asm}.out > $desired_output
+ sed "s/$subst/<stdin>/g" ${i%.asm}.err > $desired_errput
fi
tryDiff $desired_output $output out
--- a/test/asm/unique-id.err
+++ b/test/asm/unique-id.err
@@ -1,19 +1,19 @@
warning: unique-id.asm(12) -> unique-id.asm::m(4): [-Wuser]
- _0
-warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser]
_1
-warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser]
+warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser]
_2
+warning: unique-id.asm(12) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser]
+ _3
warning: unique-id.asm(12) -> unique-id.asm::m(8): [-Wuser]
- _0
+ _1
warning: unique-id.asm(14) -> unique-id.asm::m(4): [-Wuser]
- _3
-warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser]
_4
-warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser]
+warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~1(6): [-Wuser]
_5
+warning: unique-id.asm(14) -> unique-id.asm::m(5) -> unique-id.asm::m::REPT~2(6): [-Wuser]
+ _6
warning: unique-id.asm(14) -> unique-id.asm::m(8): [-Wuser]
- _3
-ERROR: unique-id.asm(15):
+ _4
+FATAL: unique-id.asm(15):
Macro argument '\@' not defined
while expanding symbol "print"
--- a/test/link/section-union/align-conflict.out
+++ b/test/link/section-union/align-conflict.out
@@ -1,6 +1,6 @@
error: Section "conflicting alignment" is defined with conflicting 4-byte alignment (offset 0) and address $cafe
---
-ERROR: -(18):
+ERROR: <stdin>(18):
Section "conflicting alignment" already declared as aligned to 4 bytes (offset 0)
-ERROR: -(18):
+FATAL: <stdin>(18):
Cannot create section "conflicting alignment" (1 errors)
--- a/test/link/section-union/align-ofs-conflict.out
+++ b/test/link/section-union/align-ofs-conflict.out
@@ -1,6 +1,6 @@
error: Section "conflicting alignment" is defined with conflicting 8-byte alignment (offset 7) and 16-byte alignment (offset 14)
---
-ERROR: -(18):
+ERROR: <stdin>(18):
Section "conflicting alignment" already declared with incompatible 3-byte alignment (offset 7)
-ERROR: -(18):
+FATAL: <stdin>(18):
Cannot create section "conflicting alignment" (1 errors)
--- a/test/link/section-union/assert.out
+++ b/test/link/section-union/assert.out
@@ -1,6 +1,6 @@
error: section-union/assert.asm(11): Force failing the build
Linking failed with 1 error
---
-ERROR: -(30):
+ERROR: <stdin>(30):
Assertion failed: Force failing the build
error: Assembly aborted (1 errors)!
--- a/test/link/section-union/bad-types.out
+++ b/test/link/section-union/bad-types.out
@@ -1,6 +1,6 @@
error: Section "conflicting types" is defined with conflicting types HRAM and WRAM0
---
-ERROR: -(18):
+ERROR: <stdin>(18):
Section "conflicting types" already exists but with type HRAM
-ERROR: -(18):
+FATAL: <stdin>(18):
Cannot create section "conflicting types" (1 errors)
--- a/test/link/section-union/bank-conflict.out
+++ b/test/link/section-union/bank-conflict.out
@@ -1,6 +1,6 @@
error: Section "conflicting banks" is defined with conflicting banks 4 and 1
---
-ERROR: -(14):
+ERROR: <stdin>(14):
Section "conflicting banks" already declared with different bank 4
-ERROR: -(14):
+FATAL: <stdin>(14):
Cannot create section "conflicting banks" (1 errors)
--- a/test/link/section-union/data-overlay.out
+++ b/test/link/section-union/data-overlay.out
@@ -1,6 +1,6 @@
error: Section "overlaid data" is of type ROM0, which cannot be unionized
---
-ERROR: -(18):
+ERROR: <stdin>(18):
Cannot declare ROM sections as UNION
-ERROR: -(18):
+FATAL: <stdin>(18):
Cannot create section "overlaid data" (1 errors)
--- a/test/link/section-union/different-data.out
+++ b/test/link/section-union/different-data.out
@@ -1,6 +1,6 @@
error: Section "different data" is of type ROM0, which cannot be unionized
---
-ERROR: -(16):
+ERROR: <stdin>(16):
Cannot declare ROM sections as UNION
-ERROR: -(16):
+FATAL: <stdin>(16):
Cannot create section "different data" (1 errors)
--- a/test/link/section-union/different-ofs.out
+++ b/test/link/section-union/different-ofs.out
@@ -1,6 +1,6 @@
error: Section "conflicting alignment" is defined with conflicting 8-byte alignment (offset 7) and 8-byte alignment (offset 6)
---
-ERROR: -(18):
+ERROR: <stdin>(18):
Section "conflicting alignment" already declared with incompatible 3-byte alignment (offset 7)
-ERROR: -(18):
+FATAL: <stdin>(18):
Cannot create section "conflicting alignment" (1 errors)
--- a/test/link/section-union/different-size.out
+++ b/test/link/section-union/different-size.out
@@ -1,6 +1,6 @@
error: Section "different section sizes" is of type ROM0, which cannot be unionized
---
-ERROR: -(16):
+ERROR: <stdin>(16):
Cannot declare ROM sections as UNION
-ERROR: -(16):
+FATAL: <stdin>(16):
Cannot create section "different section sizes" (1 errors)
--- a/test/link/section-union/different-syntaxes.out
+++ b/test/link/section-union/different-syntaxes.out
@@ -1,6 +1,6 @@
error: Section "different syntaxes" is of type ROM0, which cannot be unionized
---
-ERROR: -(18):
+ERROR: <stdin>(18):
Cannot declare ROM sections as UNION
-ERROR: -(18):
+FATAL: <stdin>(18):
Cannot create section "different syntaxes" (1 errors)
--- a/test/link/section-union/org-conflict.out
+++ b/test/link/section-union/org-conflict.out
@@ -1,6 +1,6 @@
error: Section "conflicting address" is defined with conflicting addresses $beef and $babe
---
-ERROR: -(16):
+ERROR: <stdin>(16):
Section "conflicting address" already declared as fixed at different address $beef
-ERROR: -(16):
+FATAL: <stdin>(16):
Cannot create section "conflicting address" (1 errors)
--- a/test/link/section-union/split-data.out
+++ b/test/link/section-union/split-data.out
@@ -1,6 +1,6 @@
error: Section "mutually-overlaid data" is of type ROM0, which cannot be unionized
---
-ERROR: -(18):
+ERROR: <stdin>(18):
Cannot declare ROM sections as UNION
-ERROR: -(18):
+FATAL: <stdin>(18):
Cannot create section "mutually-overlaid data" (1 errors)