diff options
author | Jo-Philipp Wich <jo@mein.io> | 2022-01-13 16:06:17 +0100 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2022-01-18 10:58:11 +0100 |
commit | 6c2caf9fbb9d346cfb20cd5c83875fdff77e584c (patch) | |
tree | 4d0fe816584e8f351ed0f1da8be0b9ccf1c5635f | |
parent | 725bb75b7b66dd1e0a381908e831cede0402cb6e (diff) |
source: refactor source file handling
- Move source object pointer into program entity which is referenced by
each function
- Move lineinfo related routines into source.c and use them from lexer.c
since lineinfo encoding does not belong into the lexical analyzer.
- Implement initial infrastructure for detecting source file type,
this is required later to differentiate between plaintext and
precompiled bytecode files
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r-- | compiler.c | 59 | ||||
-rw-r--r-- | include/ucode/program.h | 4 | ||||
-rw-r--r-- | include/ucode/source.h | 9 | ||||
-rw-r--r-- | include/ucode/types.h | 4 | ||||
-rw-r--r-- | lexer.c | 93 | ||||
-rw-r--r-- | lib.c | 4 | ||||
-rw-r--r-- | program.c | 10 | ||||
-rw-r--r-- | source.c | 82 | ||||
-rw-r--r-- | types.c | 4 | ||||
-rw-r--r-- | vm.c | 24 |
10 files changed, 160 insertions, 133 deletions
@@ -114,7 +114,7 @@ uc_compiler_exprstack_is(uc_compiler_t *compiler, uc_exprflag_t flag) } static void -uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_source_t *source, uc_program_t *program, bool strict) +uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_program_t *program, bool strict) { uc_value_t *varname = ucv_string_new("(callee)"); uc_function_t *fn; @@ -122,7 +122,7 @@ uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_so compiler->scope_depth = 0; compiler->program = program; - compiler->function = uc_program_function_new(program, name, srcpos, source); + compiler->function = uc_program_function_new(program, name, srcpos); compiler->locals.count = 0; compiler->locals.entries = NULL; @@ -156,9 +156,7 @@ uc_compiler_current_chunk(uc_compiler_t *compiler) static uc_source_t * uc_compiler_current_source(uc_compiler_t *compiler) { - uc_function_t *fn = (uc_function_t *)compiler->function; - - return fn->source; + return compiler->program->source; } __attribute__((format(printf, 3, 0))) static void @@ -1132,7 +1130,7 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest pos = compiler->parser->prev.pos; uc_compiler_init(&fncompiler, NULL, compiler->parser->prev.pos, - uc_compiler_current_source(compiler), compiler->program, + compiler->program, uc_compiler_is_strict(compiler)); fncompiler.parent = compiler; @@ -1562,7 +1560,7 @@ uc_compiler_compile_function(uc_compiler_t *compiler) uc_compiler_init(&fncompiler, name ? ucv_string_get(name) : NULL, compiler->parser->prev.pos, - uc_compiler_current_source(compiler), compiler->program, + compiler->program, uc_compiler_is_strict(compiler)); fncompiler.parent = compiler; @@ -2873,29 +2871,42 @@ uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) uc_exprstack_t expr = { .token = TK_EOF }; uc_parser_t parser = { .config = config }; uc_compiler_t compiler = { .parser = &parser, .exprstack = &expr }; - uc_program_t *prog = uc_program_new(); - uc_function_t *fn; + uc_function_t *fn = NULL; + uc_program_t *prog; - uc_lexer_init(&parser.lex, config, source); - uc_compiler_init(&compiler, "main", 0, source, prog, - config && config->strict_declarations); + switch (uc_source_type_test(source)) { + case UC_SOURCE_TYPE_PLAIN: + prog = uc_program_new(source); - uc_compiler_parse_advance(&compiler); + uc_lexer_init(&parser.lex, config, source); + uc_compiler_init(&compiler, "main", 0, prog, + config && config->strict_declarations); - while (!uc_compiler_parse_match(&compiler, TK_EOF)) - uc_compiler_compile_declaration(&compiler); + uc_compiler_parse_advance(&compiler); - fn = uc_compiler_finish(&compiler); + while (!uc_compiler_parse_match(&compiler, TK_EOF)) + uc_compiler_compile_declaration(&compiler); - if (errp) { - *errp = parser.error ? parser.error->buf : NULL; - free(parser.error); - } - else { - printbuf_free(parser.error); - } + fn = uc_compiler_finish(&compiler); - uc_lexer_free(&parser.lex); + if (errp) { + *errp = parser.error ? parser.error->buf : NULL; + free(parser.error); + } + else { + printbuf_free(parser.error); + } + + uc_lexer_free(&parser.lex); + + break; + + default: + if (errp) + xasprintf(errp, "Unrecognized source type\n"); + + break; + } return fn; } diff --git a/include/ucode/program.h b/include/ucode/program.h index 9bbc67e..0f56f99 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -20,11 +20,11 @@ #include "types.h" -uc_program_t *uc_program_new(void); +uc_program_t *uc_program_new(uc_source_t *); void uc_program_free(uc_program_t *); -uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t, uc_source_t *); +uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t); size_t uc_program_function_id(uc_program_t *, uc_value_t *); uc_value_t *uc_program_function_load(uc_program_t *, size_t); diff --git a/include/ucode/source.h b/include/ucode/source.h index 3de7c93..e7a5667 100644 --- a/include/ucode/source.h +++ b/include/ucode/source.h @@ -25,6 +25,10 @@ #include "types.h" +typedef enum { + UC_SOURCE_TYPE_PLAIN = 0, +} uc_source_type_t; + uc_source_t *uc_source_new_file(const char *path); uc_source_t *uc_source_new_buffer(const char *name, char *buf, size_t len); @@ -33,4 +37,9 @@ size_t uc_source_get_line(uc_source_t *source, size_t *offset); uc_source_t *uc_source_get(uc_source_t *source); void uc_source_put(uc_source_t *source); +uc_source_type_t uc_source_type_test(uc_source_t *source); + +void uc_source_line_next(uc_source_t *source); +void uc_source_line_update(uc_source_t *source, size_t off); + #endif /* __SOURCE_H_ */ diff --git a/include/ucode/types.h b/include/ucode/types.h index be10ac5..66db5ea 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -154,7 +154,6 @@ typedef struct uc_function { size_t nupvals; size_t srcpos; uc_chunk_t chunk; - uc_source_t *source; struct uc_program *program; uc_weakref_t progref; char name[]; @@ -205,6 +204,7 @@ uc_declare_vector(uc_resource_types_t, uc_resource_type_t *); typedef struct uc_program { uc_value_list_t constants; uc_weakref_t functions; + uc_source_t *source; } uc_program_t; @@ -350,7 +350,7 @@ size_t ucv_object_length(uc_value_t *); : 0); \ entry##key = entry_next##key) -uc_value_t *ucv_function_new(const char *, size_t, uc_source_t *, uc_program_t *); +uc_value_t *ucv_function_new(const char *, size_t, uc_program_t *); size_t ucv_function_srcpos(uc_value_t *, size_t); uc_value_t *ucv_cfunction_new(const char *, uc_cfn_ptr_t); @@ -278,65 +278,14 @@ _buf_startswith(uc_lexer_t *lex, const char *str, size_t len) { #define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1) -/* lineinfo is encoded in bytes: the most significant bit specifies whether - * to advance the line count by one or not, while the remaining 7 bits encode - * the amounts of bytes on the current line. - * - * If a line has more than 127 characters, the first byte will be set to - * 0xff (1 1111111) and subsequent bytes will encode the remaining characters - * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus - * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). - * - * The newline character itself is not counted, so an empty line is encoded as - * 0x80 (1:0000000). - */ - -static void -next_lineinfo(uc_lexer_t *lex) -{ - uc_lineinfo_t *lines = &lex->source->lineinfo; - - uc_vector_grow(lines); - lines->entries[lines->count++] = 0x80; -} - -static void -update_lineinfo(uc_lexer_t *lex, size_t off) -{ - uc_lineinfo_t *lines = &lex->source->lineinfo; - uint8_t *entry, n; - - entry = uc_vector_last(lines); - - if ((entry[0] & 0x7f) + off <= 0x7f) { - entry[0] += off; - } - else { - off -= (0x7f - (entry[0] & 0x7f)); - entry[0] |= 0x7f; - - while (off > 0) { - n = (off > 0x7f) ? 0x7f : off; - uc_vector_grow(lines); - entry = uc_vector_last(lines); - entry[1] = n; - off -= n; - lines->count++; - } - } -} - static void buf_consume(uc_lexer_t *lex, size_t len) { size_t i, linelen; - if (!lex->source->lineinfo.count) - next_lineinfo(lex); - for (i = 0, linelen = 0; i < len; i++) { if (lex->bufstart[i] == '\n') { - update_lineinfo(lex, linelen); - next_lineinfo(lex); + uc_source_line_update(lex->source, linelen); + uc_source_line_next(lex->source); linelen = 0; } @@ -346,7 +295,7 @@ buf_consume(uc_lexer_t *lex, size_t len) { } if (linelen) - update_lineinfo(lex, linelen); + uc_source_line_update(lex->source, linelen); lex->bufstart += len; lex->source->off += len; @@ -1120,38 +1069,6 @@ lex_step(uc_lexer_t *lex, FILE *fp) return NULL; } -static void -uc_lexer_skip_shebang(uc_lexer_t *lex) -{ - uc_source_t *source = lex->source; - FILE *fp = source->fp; - int c1, c2; - - c1 = fgetc(fp); - c2 = fgetc(fp); - - if (c1 == '#' && c2 == '!') { - next_lineinfo(lex); - - source->off += 2; - - while ((c1 = fgetc(fp)) != EOF) { - source->off++; - - if (c1 == '\n') { - update_lineinfo(lex, source->off); - next_lineinfo(lex); - - break; - } - } - } - else { - ungetc(c2, fp); - ungetc(c1, fp); - } -} - void uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) { @@ -1187,10 +1104,6 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) lex->state = UC_LEX_IDENTIFY_TOKEN; lex->block = STATEMENTS; } - - /* Skip any potential interpreter line */ - if (lex->source->off == 0) - uc_lexer_skip_shebang(lex); } void @@ -2093,7 +2093,7 @@ uc_include(uc_vm_t *vm, size_t nargs) if (!closure) return NULL; - p = include_path(closure->function->source->filename, ucv_string_get(path)); + p = include_path(closure->function->program->source->filename, ucv_string_get(path)); if (!p) { uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, @@ -2505,7 +2505,7 @@ uc_sourcepath(uc_vm_t *vm, size_t nargs) continue; } - path = realpath(frame->closure->function->source->filename, NULL); + path = realpath(frame->closure->function->program->source->filename, NULL); break; } @@ -15,11 +15,12 @@ */ #include "ucode/program.h" +#include "ucode/source.h" #include "ucode/vallist.h" uc_program_t * -uc_program_new(void) +uc_program_new(uc_source_t *source) { uc_program_t *prog; @@ -28,6 +29,8 @@ uc_program_new(void) prog->functions.next = &prog->functions; prog->functions.prev = &prog->functions; + prog->source = uc_source_get(source); + uc_vallist_init(&prog->constants); return prog; @@ -64,15 +67,16 @@ uc_program_free(uc_program_t *prog) } uc_vallist_free(&prog->constants); + uc_source_put(prog->source); free(prog); } uc_value_t * -uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos, uc_source_t *source) +uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos) { uc_function_t *func; - func = (uc_function_t *)ucv_function_new(name, srcpos, source, prog); + func = (uc_function_t *)ucv_function_new(name, srcpos, prog); func->root = (prog->functions.next == &prog->functions); ucv_ref(&prog->functions, &func->progref); @@ -15,6 +15,7 @@ */ #include <string.h> +#include <errno.h> #include "ucode/source.h" @@ -116,3 +117,84 @@ uc_source_put(uc_source_t *source) free(source->buffer); free(source); } + +uc_source_type_t +uc_source_type_test(uc_source_t *source) +{ + union { char s[sizeof(uint32_t)]; uint32_t n; } buf; + uc_source_type_t type = UC_SOURCE_TYPE_PLAIN; + FILE *fp = source->fp; + int c; + + if (fread(buf.s, 1, 2, fp) == 2 && !strncmp(buf.s, "#!", 2)) { + source->off += 2; + + while ((c = fgetc(fp)) != EOF) { + source->off++; + + if (c == '\n') { + uc_source_line_update(source, source->off); + uc_source_line_next(source); + + break; + } + } + } + else { + if (fseek(fp, 0L, SEEK_SET) == -1) + fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno)); + } + + return type; +} + +/* lineinfo is encoded in bytes: the most significant bit specifies whether + * to advance the line count by one or not, while the remaining 7 bits encode + * the amounts of bytes on the current line. + * + * If a line has more than 127 characters, the first byte will be set to + * 0xff (1 1111111) and subsequent bytes will encode the remaining characters + * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus + * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). + * + * The newline character itself is not counted, so an empty line is encoded as + * 0x80 (1:0000000). + */ + +void +uc_source_line_next(uc_source_t *source) +{ + uc_lineinfo_t *lines = &source->lineinfo; + + uc_vector_grow(lines); + lines->entries[lines->count++] = 0x80; +} + +void +uc_source_line_update(uc_source_t *source, size_t off) +{ + uc_lineinfo_t *lines = &source->lineinfo; + uint8_t *entry, n; + + if (!lines->count) + uc_source_line_next(source); + + entry = uc_vector_last(lines); + + if ((entry[0] & 0x7f) + off <= 0x7f) { + entry[0] += off; + } + else { + off -= (0x7f - (entry[0] & 0x7f)); + entry[0] |= 0x7f; + + while (off > 0) { + n = (off > 0x7f) ? 0x7f : off; + uc_vector_grow(lines); + entry = uc_vector_last(lines); + entry[1] = n; + off -= n; + lines->count++; + } + } +} @@ -248,7 +248,6 @@ ucv_free(uc_value_t *uv, bool retain) } uc_chunk_free(&function->chunk); - uc_source_put(function->source); break; case UC_CLOSURE: @@ -951,7 +950,7 @@ ucv_object_length(uc_value_t *uv) uc_value_t * -ucv_function_new(const char *name, size_t srcpos, uc_source_t *source, uc_program_t *program) +ucv_function_new(const char *name, size_t srcpos, uc_program_t *program) { size_t namelen = 0; uc_function_t *fn; @@ -969,7 +968,6 @@ ucv_function_new(const char *name, size_t srcpos, uc_source_t *source, uc_progra fn->nargs = 0; fn->nupvals = 0; fn->srcpos = srcpos; - fn->source = uc_source_get(source); fn->program = program; fn->vararg = false; @@ -208,6 +208,12 @@ uc_vm_frame_program(uc_callframe_t *frame) return frame->closure ? frame->closure->function->program : NULL; } +static uc_source_t * +uc_vm_frame_source(uc_callframe_t *frame) +{ + return frame->closure ? frame->closure->function->program->source : NULL; +} + static uc_callframe_t * uc_vm_current_frame(uc_vm_t *vm) { @@ -586,18 +592,20 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn) uc_chunk_t *chunk = uc_vm_frame_chunk(frame); uc_stringbuf_t *buf = NULL; uc_value_t *cnst = NULL; + uc_source_t *source; size_t srcpos; srcpos = ucv_function_srcpos(&frame->closure->function->header, pos - chunk->entries); + source = uc_vm_frame_source(frame); - if (last_srcpos == 0 || last_source != frame->closure->function->source || srcpos != last_srcpos) { + if (last_srcpos == 0 || last_source != source || srcpos != last_srcpos) { buf = xprintbuf_new(); - uc_source_context_format(buf, frame->closure->function->source, srcpos, true); + uc_source_context_format(buf, source, srcpos, true); fwrite(buf->buf, 1, printbuf_length(buf), stderr); printbuf_free(buf); - last_source = frame->closure->function->source; + last_source = source; last_srcpos = srcpos; } @@ -802,6 +810,7 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) uc_value_t *stacktrace, *entry, *last = NULL; uc_function_t *function; uc_callframe_t *frame; + uc_source_t *source; size_t off, srcpos; char *name; @@ -813,12 +822,13 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) if (frame->closure) { function = frame->closure->function; + source = function->program->source; off = (frame->ip - uc_vm_frame_chunk(frame)->entries) - 1; srcpos = ucv_function_srcpos(&function->header, off); - ucv_object_add(entry, "filename", ucv_string_new(function->source->filename)); - ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(function->source, &srcpos))); + ucv_object_add(entry, "filename", ucv_string_new(source->filename)); + ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(source, &srcpos))); ucv_object_add(entry, "byte", ucv_int64_new(srcpos)); } @@ -876,7 +886,7 @@ uc_vm_get_error_context(uc_vm_t *vm) buf = ucv_stringbuf_new(); if (offset) - uc_error_context_format(buf, frame->closure->function->source, stacktrace, offset); + uc_error_context_format(buf, uc_vm_frame_source(frame), stacktrace, offset); else if (frame->ip != chunk->entries) ucv_stringbuf_printf(buf, "At instruction %zu", (frame->ip - chunk->entries) - 1); else @@ -2560,7 +2570,7 @@ uc_vm_execute(uc_vm_t *vm, uc_function_t *fn, uc_value_t **retval) if (vm->trace) { buf = xprintbuf_new(); - uc_source_context_format(buf, fn->source, 0, true); + uc_source_context_format(buf, uc_vm_frame_source(frame), 0, true); fwrite(buf->buf, 1, printbuf_length(buf), stderr); printbuf_free(buf); |