summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2022-01-13 16:06:17 +0100
committerJo-Philipp Wich <jo@mein.io>2022-01-18 10:58:11 +0100
commit6c2caf9fbb9d346cfb20cd5c83875fdff77e584c (patch)
tree4d0fe816584e8f351ed0f1da8be0b9ccf1c5635f
parent725bb75b7b66dd1e0a381908e831cede0402cb6e (diff)
source: refactor source file handling
- Move source object pointer into program entity which is referenced by each function - Move lineinfo related routines into source.c and use them from lexer.c since lineinfo encoding does not belong into the lexical analyzer. - Implement initial infrastructure for detecting source file type, this is required later to differentiate between plaintext and precompiled bytecode files Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--compiler.c59
-rw-r--r--include/ucode/program.h4
-rw-r--r--include/ucode/source.h9
-rw-r--r--include/ucode/types.h4
-rw-r--r--lexer.c93
-rw-r--r--lib.c4
-rw-r--r--program.c10
-rw-r--r--source.c82
-rw-r--r--types.c4
-rw-r--r--vm.c24
10 files changed, 160 insertions, 133 deletions
diff --git a/compiler.c b/compiler.c
index 97962b1..a4c7381 100644
--- a/compiler.c
+++ b/compiler.c
@@ -114,7 +114,7 @@ uc_compiler_exprstack_is(uc_compiler_t *compiler, uc_exprflag_t flag)
}
static void
-uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_source_t *source, uc_program_t *program, bool strict)
+uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_program_t *program, bool strict)
{
uc_value_t *varname = ucv_string_new("(callee)");
uc_function_t *fn;
@@ -122,7 +122,7 @@ uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_so
compiler->scope_depth = 0;
compiler->program = program;
- compiler->function = uc_program_function_new(program, name, srcpos, source);
+ compiler->function = uc_program_function_new(program, name, srcpos);
compiler->locals.count = 0;
compiler->locals.entries = NULL;
@@ -156,9 +156,7 @@ uc_compiler_current_chunk(uc_compiler_t *compiler)
static uc_source_t *
uc_compiler_current_source(uc_compiler_t *compiler)
{
- uc_function_t *fn = (uc_function_t *)compiler->function;
-
- return fn->source;
+ return compiler->program->source;
}
__attribute__((format(printf, 3, 0))) static void
@@ -1132,7 +1130,7 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest
pos = compiler->parser->prev.pos;
uc_compiler_init(&fncompiler, NULL, compiler->parser->prev.pos,
- uc_compiler_current_source(compiler), compiler->program,
+ compiler->program,
uc_compiler_is_strict(compiler));
fncompiler.parent = compiler;
@@ -1562,7 +1560,7 @@ uc_compiler_compile_function(uc_compiler_t *compiler)
uc_compiler_init(&fncompiler,
name ? ucv_string_get(name) : NULL, compiler->parser->prev.pos,
- uc_compiler_current_source(compiler), compiler->program,
+ compiler->program,
uc_compiler_is_strict(compiler));
fncompiler.parent = compiler;
@@ -2873,29 +2871,42 @@ uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp)
uc_exprstack_t expr = { .token = TK_EOF };
uc_parser_t parser = { .config = config };
uc_compiler_t compiler = { .parser = &parser, .exprstack = &expr };
- uc_program_t *prog = uc_program_new();
- uc_function_t *fn;
+ uc_function_t *fn = NULL;
+ uc_program_t *prog;
- uc_lexer_init(&parser.lex, config, source);
- uc_compiler_init(&compiler, "main", 0, source, prog,
- config && config->strict_declarations);
+ switch (uc_source_type_test(source)) {
+ case UC_SOURCE_TYPE_PLAIN:
+ prog = uc_program_new(source);
- uc_compiler_parse_advance(&compiler);
+ uc_lexer_init(&parser.lex, config, source);
+ uc_compiler_init(&compiler, "main", 0, prog,
+ config && config->strict_declarations);
- while (!uc_compiler_parse_match(&compiler, TK_EOF))
- uc_compiler_compile_declaration(&compiler);
+ uc_compiler_parse_advance(&compiler);
- fn = uc_compiler_finish(&compiler);
+ while (!uc_compiler_parse_match(&compiler, TK_EOF))
+ uc_compiler_compile_declaration(&compiler);
- if (errp) {
- *errp = parser.error ? parser.error->buf : NULL;
- free(parser.error);
- }
- else {
- printbuf_free(parser.error);
- }
+ fn = uc_compiler_finish(&compiler);
- uc_lexer_free(&parser.lex);
+ if (errp) {
+ *errp = parser.error ? parser.error->buf : NULL;
+ free(parser.error);
+ }
+ else {
+ printbuf_free(parser.error);
+ }
+
+ uc_lexer_free(&parser.lex);
+
+ break;
+
+ default:
+ if (errp)
+ xasprintf(errp, "Unrecognized source type\n");
+
+ break;
+ }
return fn;
}
diff --git a/include/ucode/program.h b/include/ucode/program.h
index 9bbc67e..0f56f99 100644
--- a/include/ucode/program.h
+++ b/include/ucode/program.h
@@ -20,11 +20,11 @@
#include "types.h"
-uc_program_t *uc_program_new(void);
+uc_program_t *uc_program_new(uc_source_t *);
void uc_program_free(uc_program_t *);
-uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t, uc_source_t *);
+uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t);
size_t uc_program_function_id(uc_program_t *, uc_value_t *);
uc_value_t *uc_program_function_load(uc_program_t *, size_t);
diff --git a/include/ucode/source.h b/include/ucode/source.h
index 3de7c93..e7a5667 100644
--- a/include/ucode/source.h
+++ b/include/ucode/source.h
@@ -25,6 +25,10 @@
#include "types.h"
+typedef enum {
+ UC_SOURCE_TYPE_PLAIN = 0,
+} uc_source_type_t;
+
uc_source_t *uc_source_new_file(const char *path);
uc_source_t *uc_source_new_buffer(const char *name, char *buf, size_t len);
@@ -33,4 +37,9 @@ size_t uc_source_get_line(uc_source_t *source, size_t *offset);
uc_source_t *uc_source_get(uc_source_t *source);
void uc_source_put(uc_source_t *source);
+uc_source_type_t uc_source_type_test(uc_source_t *source);
+
+void uc_source_line_next(uc_source_t *source);
+void uc_source_line_update(uc_source_t *source, size_t off);
+
#endif /* __SOURCE_H_ */
diff --git a/include/ucode/types.h b/include/ucode/types.h
index be10ac5..66db5ea 100644
--- a/include/ucode/types.h
+++ b/include/ucode/types.h
@@ -154,7 +154,6 @@ typedef struct uc_function {
size_t nupvals;
size_t srcpos;
uc_chunk_t chunk;
- uc_source_t *source;
struct uc_program *program;
uc_weakref_t progref;
char name[];
@@ -205,6 +204,7 @@ uc_declare_vector(uc_resource_types_t, uc_resource_type_t *);
typedef struct uc_program {
uc_value_list_t constants;
uc_weakref_t functions;
+ uc_source_t *source;
} uc_program_t;
@@ -350,7 +350,7 @@ size_t ucv_object_length(uc_value_t *);
: 0); \
entry##key = entry_next##key)
-uc_value_t *ucv_function_new(const char *, size_t, uc_source_t *, uc_program_t *);
+uc_value_t *ucv_function_new(const char *, size_t, uc_program_t *);
size_t ucv_function_srcpos(uc_value_t *, size_t);
uc_value_t *ucv_cfunction_new(const char *, uc_cfn_ptr_t);
diff --git a/lexer.c b/lexer.c
index 7637306..d554c0d 100644
--- a/lexer.c
+++ b/lexer.c
@@ -278,65 +278,14 @@ _buf_startswith(uc_lexer_t *lex, const char *str, size_t len) {
#define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1)
-/* lineinfo is encoded in bytes: the most significant bit specifies whether
- * to advance the line count by one or not, while the remaining 7 bits encode
- * the amounts of bytes on the current line.
- *
- * If a line has more than 127 characters, the first byte will be set to
- * 0xff (1 1111111) and subsequent bytes will encode the remaining characters
- * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus
- * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111).
- *
- * The newline character itself is not counted, so an empty line is encoded as
- * 0x80 (1:0000000).
- */
-
-static void
-next_lineinfo(uc_lexer_t *lex)
-{
- uc_lineinfo_t *lines = &lex->source->lineinfo;
-
- uc_vector_grow(lines);
- lines->entries[lines->count++] = 0x80;
-}
-
-static void
-update_lineinfo(uc_lexer_t *lex, size_t off)
-{
- uc_lineinfo_t *lines = &lex->source->lineinfo;
- uint8_t *entry, n;
-
- entry = uc_vector_last(lines);
-
- if ((entry[0] & 0x7f) + off <= 0x7f) {
- entry[0] += off;
- }
- else {
- off -= (0x7f - (entry[0] & 0x7f));
- entry[0] |= 0x7f;
-
- while (off > 0) {
- n = (off > 0x7f) ? 0x7f : off;
- uc_vector_grow(lines);
- entry = uc_vector_last(lines);
- entry[1] = n;
- off -= n;
- lines->count++;
- }
- }
-}
-
static void
buf_consume(uc_lexer_t *lex, size_t len) {
size_t i, linelen;
- if (!lex->source->lineinfo.count)
- next_lineinfo(lex);
-
for (i = 0, linelen = 0; i < len; i++) {
if (lex->bufstart[i] == '\n') {
- update_lineinfo(lex, linelen);
- next_lineinfo(lex);
+ uc_source_line_update(lex->source, linelen);
+ uc_source_line_next(lex->source);
linelen = 0;
}
@@ -346,7 +295,7 @@ buf_consume(uc_lexer_t *lex, size_t len) {
}
if (linelen)
- update_lineinfo(lex, linelen);
+ uc_source_line_update(lex->source, linelen);
lex->bufstart += len;
lex->source->off += len;
@@ -1120,38 +1069,6 @@ lex_step(uc_lexer_t *lex, FILE *fp)
return NULL;
}
-static void
-uc_lexer_skip_shebang(uc_lexer_t *lex)
-{
- uc_source_t *source = lex->source;
- FILE *fp = source->fp;
- int c1, c2;
-
- c1 = fgetc(fp);
- c2 = fgetc(fp);
-
- if (c1 == '#' && c2 == '!') {
- next_lineinfo(lex);
-
- source->off += 2;
-
- while ((c1 = fgetc(fp)) != EOF) {
- source->off++;
-
- if (c1 == '\n') {
- update_lineinfo(lex, source->off);
- next_lineinfo(lex);
-
- break;
- }
- }
- }
- else {
- ungetc(c2, fp);
- ungetc(c1, fp);
- }
-}
-
void
uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source)
{
@@ -1187,10 +1104,6 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source)
lex->state = UC_LEX_IDENTIFY_TOKEN;
lex->block = STATEMENTS;
}
-
- /* Skip any potential interpreter line */
- if (lex->source->off == 0)
- uc_lexer_skip_shebang(lex);
}
void
diff --git a/lib.c b/lib.c
index 7ded088..0e77b23 100644
--- a/lib.c
+++ b/lib.c
@@ -2093,7 +2093,7 @@ uc_include(uc_vm_t *vm, size_t nargs)
if (!closure)
return NULL;
- p = include_path(closure->function->source->filename, ucv_string_get(path));
+ p = include_path(closure->function->program->source->filename, ucv_string_get(path));
if (!p) {
uc_vm_raise_exception(vm, EXCEPTION_RUNTIME,
@@ -2505,7 +2505,7 @@ uc_sourcepath(uc_vm_t *vm, size_t nargs)
continue;
}
- path = realpath(frame->closure->function->source->filename, NULL);
+ path = realpath(frame->closure->function->program->source->filename, NULL);
break;
}
diff --git a/program.c b/program.c
index 5d3a104..f8190e4 100644
--- a/program.c
+++ b/program.c
@@ -15,11 +15,12 @@
*/
#include "ucode/program.h"
+#include "ucode/source.h"
#include "ucode/vallist.h"
uc_program_t *
-uc_program_new(void)
+uc_program_new(uc_source_t *source)
{
uc_program_t *prog;
@@ -28,6 +29,8 @@ uc_program_new(void)
prog->functions.next = &prog->functions;
prog->functions.prev = &prog->functions;
+ prog->source = uc_source_get(source);
+
uc_vallist_init(&prog->constants);
return prog;
@@ -64,15 +67,16 @@ uc_program_free(uc_program_t *prog)
}
uc_vallist_free(&prog->constants);
+ uc_source_put(prog->source);
free(prog);
}
uc_value_t *
-uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos, uc_source_t *source)
+uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos)
{
uc_function_t *func;
- func = (uc_function_t *)ucv_function_new(name, srcpos, source, prog);
+ func = (uc_function_t *)ucv_function_new(name, srcpos, prog);
func->root = (prog->functions.next == &prog->functions);
ucv_ref(&prog->functions, &func->progref);
diff --git a/source.c b/source.c
index b7bb96d..aa73efd 100644
--- a/source.c
+++ b/source.c
@@ -15,6 +15,7 @@
*/
#include <string.h>
+#include <errno.h>
#include "ucode/source.h"
@@ -116,3 +117,84 @@ uc_source_put(uc_source_t *source)
free(source->buffer);
free(source);
}
+
+uc_source_type_t
+uc_source_type_test(uc_source_t *source)
+{
+ union { char s[sizeof(uint32_t)]; uint32_t n; } buf;
+ uc_source_type_t type = UC_SOURCE_TYPE_PLAIN;
+ FILE *fp = source->fp;
+ int c;
+
+ if (fread(buf.s, 1, 2, fp) == 2 && !strncmp(buf.s, "#!", 2)) {
+ source->off += 2;
+
+ while ((c = fgetc(fp)) != EOF) {
+ source->off++;
+
+ if (c == '\n') {
+ uc_source_line_update(source, source->off);
+ uc_source_line_next(source);
+
+ break;
+ }
+ }
+ }
+ else {
+ if (fseek(fp, 0L, SEEK_SET) == -1)
+ fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno));
+ }
+
+ return type;
+}
+
+/* lineinfo is encoded in bytes: the most significant bit specifies whether
+ * to advance the line count by one or not, while the remaining 7 bits encode
+ * the amounts of bytes on the current line.
+ *
+ * If a line has more than 127 characters, the first byte will be set to
+ * 0xff (1 1111111) and subsequent bytes will encode the remaining characters
+ * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus
+ * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111).
+ *
+ * The newline character itself is not counted, so an empty line is encoded as
+ * 0x80 (1:0000000).
+ */
+
+void
+uc_source_line_next(uc_source_t *source)
+{
+ uc_lineinfo_t *lines = &source->lineinfo;
+
+ uc_vector_grow(lines);
+ lines->entries[lines->count++] = 0x80;
+}
+
+void
+uc_source_line_update(uc_source_t *source, size_t off)
+{
+ uc_lineinfo_t *lines = &source->lineinfo;
+ uint8_t *entry, n;
+
+ if (!lines->count)
+ uc_source_line_next(source);
+
+ entry = uc_vector_last(lines);
+
+ if ((entry[0] & 0x7f) + off <= 0x7f) {
+ entry[0] += off;
+ }
+ else {
+ off -= (0x7f - (entry[0] & 0x7f));
+ entry[0] |= 0x7f;
+
+ while (off > 0) {
+ n = (off > 0x7f) ? 0x7f : off;
+ uc_vector_grow(lines);
+ entry = uc_vector_last(lines);
+ entry[1] = n;
+ off -= n;
+ lines->count++;
+ }
+ }
+}
diff --git a/types.c b/types.c
index 65cba22..68eba42 100644
--- a/types.c
+++ b/types.c
@@ -248,7 +248,6 @@ ucv_free(uc_value_t *uv, bool retain)
}
uc_chunk_free(&function->chunk);
- uc_source_put(function->source);
break;
case UC_CLOSURE:
@@ -951,7 +950,7 @@ ucv_object_length(uc_value_t *uv)
uc_value_t *
-ucv_function_new(const char *name, size_t srcpos, uc_source_t *source, uc_program_t *program)
+ucv_function_new(const char *name, size_t srcpos, uc_program_t *program)
{
size_t namelen = 0;
uc_function_t *fn;
@@ -969,7 +968,6 @@ ucv_function_new(const char *name, size_t srcpos, uc_source_t *source, uc_progra
fn->nargs = 0;
fn->nupvals = 0;
fn->srcpos = srcpos;
- fn->source = uc_source_get(source);
fn->program = program;
fn->vararg = false;
diff --git a/vm.c b/vm.c
index 1766dc6..f6886ba 100644
--- a/vm.c
+++ b/vm.c
@@ -208,6 +208,12 @@ uc_vm_frame_program(uc_callframe_t *frame)
return frame->closure ? frame->closure->function->program : NULL;
}
+static uc_source_t *
+uc_vm_frame_source(uc_callframe_t *frame)
+{
+ return frame->closure ? frame->closure->function->program->source : NULL;
+}
+
static uc_callframe_t *
uc_vm_current_frame(uc_vm_t *vm)
{
@@ -586,18 +592,20 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn)
uc_chunk_t *chunk = uc_vm_frame_chunk(frame);
uc_stringbuf_t *buf = NULL;
uc_value_t *cnst = NULL;
+ uc_source_t *source;
size_t srcpos;
srcpos = ucv_function_srcpos(&frame->closure->function->header, pos - chunk->entries);
+ source = uc_vm_frame_source(frame);
- if (last_srcpos == 0 || last_source != frame->closure->function->source || srcpos != last_srcpos) {
+ if (last_srcpos == 0 || last_source != source || srcpos != last_srcpos) {
buf = xprintbuf_new();
- uc_source_context_format(buf, frame->closure->function->source, srcpos, true);
+ uc_source_context_format(buf, source, srcpos, true);
fwrite(buf->buf, 1, printbuf_length(buf), stderr);
printbuf_free(buf);
- last_source = frame->closure->function->source;
+ last_source = source;
last_srcpos = srcpos;
}
@@ -802,6 +810,7 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i)
uc_value_t *stacktrace, *entry, *last = NULL;
uc_function_t *function;
uc_callframe_t *frame;
+ uc_source_t *source;
size_t off, srcpos;
char *name;
@@ -813,12 +822,13 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i)
if (frame->closure) {
function = frame->closure->function;
+ source = function->program->source;
off = (frame->ip - uc_vm_frame_chunk(frame)->entries) - 1;
srcpos = ucv_function_srcpos(&function->header, off);
- ucv_object_add(entry, "filename", ucv_string_new(function->source->filename));
- ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(function->source, &srcpos)));
+ ucv_object_add(entry, "filename", ucv_string_new(source->filename));
+ ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(source, &srcpos)));
ucv_object_add(entry, "byte", ucv_int64_new(srcpos));
}
@@ -876,7 +886,7 @@ uc_vm_get_error_context(uc_vm_t *vm)
buf = ucv_stringbuf_new();
if (offset)
- uc_error_context_format(buf, frame->closure->function->source, stacktrace, offset);
+ uc_error_context_format(buf, uc_vm_frame_source(frame), stacktrace, offset);
else if (frame->ip != chunk->entries)
ucv_stringbuf_printf(buf, "At instruction %zu", (frame->ip - chunk->entries) - 1);
else
@@ -2560,7 +2570,7 @@ uc_vm_execute(uc_vm_t *vm, uc_function_t *fn, uc_value_t **retval)
if (vm->trace) {
buf = xprintbuf_new();
- uc_source_context_format(buf, fn->source, 0, true);
+ uc_source_context_format(buf, uc_vm_frame_source(frame), 0, true);
fwrite(buf->buf, 1, printbuf_length(buf), stderr);
printbuf_free(buf);