From 6b2e79af9fe6e7d05d31245fc9049540a96d5d31 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Fri, 7 Jan 2022 19:42:12 +0100 Subject: types: add initial infrastructure for function serialization - Introduce a new "program" entity which holds the list of functions created during compilation - Instead of storing pointers to the in-memory function representation in the constant list, store the index of the function within the program's function list - When loading functions from the constant list, retrieve the function by index from the program entity Signed-off-by: Jo-Philipp Wich --- program.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 program.c (limited to 'program.c') diff --git a/program.c b/program.c new file mode 100644 index 0000000..c413f38 --- /dev/null +++ b/program.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2022 Jo-Philipp Wich + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ucode/program.h" + + +uc_program_t * +uc_program_new(void) +{ + uc_program_t *prog; + + prog = xalloc(sizeof(*prog)); + + prog->functions.next = &prog->functions; + prog->functions.prev = &prog->functions; + + return prog; +} + +static inline uc_function_t * +ref_to_function(uc_weakref_t *ref) +{ + return (uc_function_t *)((uintptr_t)ref - offsetof(uc_function_t, progref)); +} + +static inline uc_value_t * +ref_to_uv(uc_weakref_t *ref) +{ + return (uc_value_t *)((uintptr_t)ref - offsetof(uc_function_t, progref)); +} + +void +uc_program_free(uc_program_t *prog) +{ + uc_weakref_t *ref, *tmp; + uc_function_t *func; + + if (!prog) + return; + + for (ref = prog->functions.next, tmp = ref->next; ref != &prog->functions; ref = tmp, tmp = tmp->next) { + func = ref_to_function(ref); + func->program = NULL; + func->progref.next = NULL; + func->progref.prev = NULL; + + ucv_put(&func->header); + } + + free(prog); +} + +uc_value_t * +uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos, uc_source_t *source) +{ + uc_function_t *func; + + func = (uc_function_t *)ucv_function_new(name, srcpos, source, prog); + func->root = (prog->functions.next == &prog->functions); + + ucv_ref(&prog->functions, &func->progref); + + return &func->header; +} + +size_t +uc_program_function_id(uc_program_t *prog, uc_value_t *func) +{ + uc_weakref_t *ref; + size_t i; + + for (ref = prog->functions.prev, i = 1; ref != &prog->functions; ref = ref->prev, i++) + if (ref_to_uv(ref) == func) + return i; + + return 0; +} + +uc_value_t * +uc_program_function_load(uc_program_t *prog, size_t id) +{ + uc_weakref_t *ref; + size_t i; + + for (ref = prog->functions.prev, i = 1; ref != &prog->functions; ref = ref->prev, i++) + if (i == id) + return ref_to_uv(ref); + + return NULL; +} -- cgit v1.2.3 From 725bb75b7b66dd1e0a381908e831cede0402cb6e Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Fri, 7 Jan 2022 20:03:17 +0100 Subject: compiler, vm: use a program wide constant list Instead of storing constant values per function, maintain a global program wide list for all constant values within the current compilation unit. Signed-off-by: Jo-Philipp Wich --- chunk.c | 14 -------------- compiler.c | 14 +++++--------- include/ucode/chunk.h | 2 -- include/ucode/program.h | 3 +++ include/ucode/types.h | 2 +- program.c | 16 ++++++++++++++++ vallist.c | 8 +++----- vm.c | 41 ++++++++++++++++++----------------------- 8 files changed, 46 insertions(+), 54 deletions(-) (limited to 'program.c') diff --git a/chunk.c b/chunk.c index ab576da..5dbd1a1 100644 --- a/chunk.c +++ b/chunk.c @@ -45,7 +45,6 @@ uc_chunk_init(uc_chunk_t *chunk) chunk->debuginfo.variables.count = 0; chunk->debuginfo.variables.entries = NULL; - uc_vallist_init(&chunk->constants); uc_vallist_init(&chunk->debuginfo.varnames); } @@ -54,7 +53,6 @@ uc_chunk_free(uc_chunk_t *chunk) { uc_vector_clear(chunk); uc_vector_clear(&chunk->ehranges); - uc_vallist_free(&chunk->constants); uc_vector_clear(&chunk->debuginfo.offsets); uc_vector_clear(&chunk->debuginfo.variables); @@ -136,18 +134,6 @@ uc_chunk_pop(uc_chunk_t *chunk) } } -uc_value_t * -uc_chunk_get_constant(uc_chunk_t *chunk, size_t idx) -{ - return uc_vallist_get(&chunk->constants, idx); -} - -ssize_t -uc_chunk_add_constant(uc_chunk_t *chunk, uc_value_t *val) -{ - return uc_vallist_add(&chunk->constants, val); -} - size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off) { diff --git a/compiler.c b/compiler.c index 745707b..97962b1 100644 --- a/compiler.c +++ b/compiler.c @@ -483,8 +483,7 @@ uc_compiler_set_u32(uc_compiler_t *compiler, size_t off, uint32_t n) static size_t uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) { - uc_chunk_t *chunk = uc_compiler_current_chunk(compiler); - size_t cidx = uc_chunk_add_constant(chunk, val); + size_t cidx = uc_program_add_constant(compiler->program, val); uc_compiler_emit_insn(compiler, srcpos, I_LOAD); uc_compiler_emit_u32(compiler, 0, cidx); @@ -495,8 +494,7 @@ uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *va static size_t uc_compiler_emit_regexp(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) { - uc_chunk_t *chunk = uc_compiler_current_chunk(compiler); - size_t cidx = uc_chunk_add_constant(chunk, val); + size_t cidx = uc_program_add_constant(compiler->program, val); uc_compiler_emit_insn(compiler, srcpos, I_LREXP); uc_compiler_emit_u32(compiler, 0, cidx); @@ -1086,7 +1084,7 @@ uc_compiler_emit_variable_rw(uc_compiler_t *compiler, uc_value_t *varname, uc_to ((sub_insn & 0xff) << 24) | idx); } else { - idx = uc_chunk_add_constant(uc_compiler_current_chunk(compiler), varname); + idx = uc_program_add_constant(compiler->program, varname); insn = sub_insn ? I_UVAR : (type ? I_SVAR : I_LVAR); uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, insn); @@ -1195,8 +1193,7 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest if (fn) uc_compiler_set_u32(compiler, load_off, - uc_chunk_add_constant(uc_compiler_current_chunk(compiler), - &fn->header)); + uc_program_add_constant(compiler->program, &fn->header)); return true; } @@ -1635,8 +1632,7 @@ uc_compiler_compile_function(uc_compiler_t *compiler) if (fn) uc_compiler_set_u32(compiler, load_off, - uc_chunk_add_constant(uc_compiler_current_chunk(compiler), - &fn->header)); + uc_program_add_constant(compiler->program, &fn->header)); /* if a local variable of the same name already existed, overwrite its value * with the compiled function here */ diff --git a/include/ucode/chunk.h b/include/ucode/chunk.h index 0005e3c..6804eeb 100644 --- a/include/ucode/chunk.h +++ b/include/ucode/chunk.h @@ -28,8 +28,6 @@ void uc_chunk_init(uc_chunk_t *chunk); void uc_chunk_free(uc_chunk_t *chunk); size_t uc_chunk_add(uc_chunk_t *chunk, uint8_t byte, size_t line); -ssize_t uc_chunk_add_constant(uc_chunk_t *chunk, uc_value_t *value); -uc_value_t *uc_chunk_get_constant(uc_chunk_t *chunk, size_t idx); void uc_chunk_pop(uc_chunk_t *chunk); size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off); diff --git a/include/ucode/program.h b/include/ucode/program.h index 19b3c9f..9bbc67e 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -28,4 +28,7 @@ uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t, uc_sou size_t uc_program_function_id(uc_program_t *, uc_value_t *); uc_value_t *uc_program_function_load(uc_program_t *, size_t); +uc_value_t *uc_program_get_constant(uc_program_t *, size_t); +ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); + #endif /* __PROGRAM_H_ */ diff --git a/include/ucode/types.h b/include/ucode/types.h index 7bd0ea9..be10ac5 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -90,7 +90,6 @@ uc_declare_vector(uc_offsetinfo_t, uint8_t); typedef struct { size_t count; uint8_t *entries; - uc_value_list_t constants; uc_ehranges_t ehranges; struct { uc_variables_t variables; @@ -204,6 +203,7 @@ uc_declare_vector(uc_resource_types_t, uc_resource_type_t *); /* Program structure definitions */ typedef struct uc_program { + uc_value_list_t constants; uc_weakref_t functions; } uc_program_t; diff --git a/program.c b/program.c index c413f38..5d3a104 100644 --- a/program.c +++ b/program.c @@ -15,6 +15,7 @@ */ #include "ucode/program.h" +#include "ucode/vallist.h" uc_program_t * @@ -27,6 +28,8 @@ uc_program_new(void) prog->functions.next = &prog->functions; prog->functions.prev = &prog->functions; + uc_vallist_init(&prog->constants); + return prog; } @@ -60,6 +63,7 @@ uc_program_free(uc_program_t *prog) ucv_put(&func->header); } + uc_vallist_free(&prog->constants); free(prog); } @@ -101,3 +105,15 @@ uc_program_function_load(uc_program_t *prog, size_t id) return NULL; } + +uc_value_t * +uc_program_get_constant(uc_program_t *prog, size_t idx) +{ + return uc_vallist_get(&prog->constants, idx); +} + +ssize_t +uc_program_add_constant(uc_program_t *prog, uc_value_t *val) +{ + return uc_vallist_add(&prog->constants, val); +} diff --git a/vallist.c b/vallist.c index abf29ad..d7826a0 100644 --- a/vallist.c +++ b/vallist.c @@ -553,8 +553,7 @@ uc_value_t * uc_vallist_get(uc_value_list_t *list, size_t idx) { char str[sizeof(TAG_TYPE)]; - uc_function_t *func; - uc_chunk_t *chunk; + uc_program_t *program; size_t n, len; switch (uc_vallist_type(list, idx)) { @@ -593,10 +592,9 @@ uc_vallist_get(uc_value_list_t *list, size_t idx) return ucv_string_new_length(list->data + TAG_GET_OFFSET(list->index[idx]) + sizeof(uint32_t), len); case TAG_FUNC: - chunk = container_of(list, uc_chunk_t, constants); - func = container_of(chunk, uc_function_t, chunk); + program = container_of(list, uc_program_t, constants); - return uc_program_function_load(func->program, TAG_GET_NV(list->index[idx])); + return uc_program_function_load(program, TAG_GET_NV(list->index[idx])); default: return NULL; diff --git a/vm.c b/vm.c index b8cf50f..1766dc6 100644 --- a/vm.c +++ b/vm.c @@ -24,6 +24,7 @@ #include "ucode/vm.h" #include "ucode/compiler.h" +#include "ucode/program.h" #include "ucode/lib.h" /* uc_error_context_format() */ #undef __insn @@ -201,16 +202,22 @@ uc_vm_frame_chunk(uc_callframe_t *frame) return frame->closure ? &frame->closure->function->chunk : NULL; } +static uc_program_t * +uc_vm_frame_program(uc_callframe_t *frame) +{ + return frame->closure ? frame->closure->function->program : NULL; +} + static uc_callframe_t * uc_vm_current_frame(uc_vm_t *vm) { return uc_vector_last(&vm->callframes); } -static uc_chunk_t * -uc_vm_current_chunk(uc_vm_t *vm) +static uc_program_t * +uc_vm_current_program(uc_vm_t *vm) { - return uc_vm_frame_chunk(uc_vm_current_frame(vm)); + return uc_vm_frame_program(uc_vm_current_frame(vm)); } static bool @@ -319,18 +326,6 @@ uc_vm_frame_dump(uc_vm_t *vm, uc_callframe_t *frame) uc_vm_format_val(vm, frame->ctx)); if (chunk) { - fprintf(stderr, " |- %zu constants\n", - chunk->constants.isize); - - for (i = 0; i < chunk->constants.isize; i++) { - v = uc_chunk_get_constant(chunk, i); - - fprintf(stderr, " | [%zu] %s\n", - i, uc_vm_format_val(vm, v)); - - ucv_put(v); - } - closure = frame->closure; function = closure->function; @@ -649,7 +644,7 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn) case I_LOAD: case I_LVAR: case I_SVAR: - cnst = uc_chunk_get_constant(uc_vm_frame_chunk(uc_vector_last(&vm->callframes)), vm->arg.u32); + cnst = uc_program_get_constant(uc_vm_frame_program(uc_vector_last(&vm->callframes)), vm->arg.u32); fprintf(stderr, "\t; %s", cnst ? uc_vm_format_val(vm, cnst) : "(?)"); @@ -676,7 +671,7 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn) case I_UVAR: if (!cnst) - cnst = uc_chunk_get_constant(uc_vm_frame_chunk(uc_vector_last(&vm->callframes)), vm->arg.u32 & 0x00ffffff); + cnst = uc_program_get_constant(uc_vm_frame_program(uc_vector_last(&vm->callframes)), vm->arg.u32 & 0x00ffffff); fprintf(stderr, "\t; %s (%s)", cnst ? uc_vm_format_val(vm, cnst) : "(?)", @@ -915,7 +910,7 @@ uc_vm_insn_load(uc_vm_t *vm, uc_vm_insn_t insn) { switch (insn) { case I_LOAD: - uc_vm_stack_push(vm, uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32)); + uc_vm_stack_push(vm, uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32)); break; case I_LOAD8: @@ -938,7 +933,7 @@ uc_vm_insn_load(uc_vm_t *vm, uc_vm_insn_t insn) static void uc_vm_insn_load_regexp(uc_vm_t *vm, uc_vm_insn_t insn) { - uc_value_t *re, *jstr = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + uc_value_t *re, *jstr = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); bool icase = false, newline = false, global = false; char *str, *err = NULL; @@ -987,7 +982,7 @@ uc_vm_insn_load_var(uc_vm_t *vm, uc_vm_insn_t insn) bool found; scope = vm->globals; - name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + name = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); while (ucv_type(name) == UC_STRING) { val = ucv_object_get(scope, ucv_string_get(name), &found); @@ -1128,7 +1123,7 @@ static void uc_vm_insn_load_closure(uc_vm_t *vm, uc_vm_insn_t insn) { uc_callframe_t *frame = uc_vm_current_frame(vm); - uc_value_t *fno = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + uc_value_t *fno = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); uc_function_t *function = (uc_function_t *)fno; uc_closure_t *closure = (uc_closure_t *)ucv_closure_new(vm, function, insn == I_ARFN); volatile int32_t uv; @@ -1163,7 +1158,7 @@ uc_vm_insn_store_var(uc_vm_t *vm, uc_vm_insn_t insn) bool found; scope = vm->globals; - name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + name = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); while (ucv_type(name) == UC_STRING) { ucv_object_get(scope, ucv_string_get(name), &found); @@ -1562,7 +1557,7 @@ uc_vm_insn_update_var(uc_vm_t *vm, uc_vm_insn_t insn) bool found; scope = vm->globals; - name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32 & 0x00FFFFFF); + name = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32 & 0x00FFFFFF); assert(ucv_type(name) == UC_STRING); -- cgit v1.2.3 From 6c2caf9fbb9d346cfb20cd5c83875fdff77e584c Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 13 Jan 2022 16:06:17 +0100 Subject: source: refactor source file handling - Move source object pointer into program entity which is referenced by each function - Move lineinfo related routines into source.c and use them from lexer.c since lineinfo encoding does not belong into the lexical analyzer. - Implement initial infrastructure for detecting source file type, this is required later to differentiate between plaintext and precompiled bytecode files Signed-off-by: Jo-Philipp Wich --- compiler.c | 59 ++++++++++++++++++------------- include/ucode/program.h | 4 +-- include/ucode/source.h | 9 +++++ include/ucode/types.h | 4 +-- lexer.c | 93 ++----------------------------------------------- lib.c | 4 +-- program.c | 10 ++++-- source.c | 82 +++++++++++++++++++++++++++++++++++++++++++ types.c | 4 +-- vm.c | 24 +++++++++---- 10 files changed, 160 insertions(+), 133 deletions(-) (limited to 'program.c') diff --git a/compiler.c b/compiler.c index 97962b1..a4c7381 100644 --- a/compiler.c +++ b/compiler.c @@ -114,7 +114,7 @@ uc_compiler_exprstack_is(uc_compiler_t *compiler, uc_exprflag_t flag) } static void -uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_source_t *source, uc_program_t *program, bool strict) +uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_program_t *program, bool strict) { uc_value_t *varname = ucv_string_new("(callee)"); uc_function_t *fn; @@ -122,7 +122,7 @@ uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_so compiler->scope_depth = 0; compiler->program = program; - compiler->function = uc_program_function_new(program, name, srcpos, source); + compiler->function = uc_program_function_new(program, name, srcpos); compiler->locals.count = 0; compiler->locals.entries = NULL; @@ -156,9 +156,7 @@ uc_compiler_current_chunk(uc_compiler_t *compiler) static uc_source_t * uc_compiler_current_source(uc_compiler_t *compiler) { - uc_function_t *fn = (uc_function_t *)compiler->function; - - return fn->source; + return compiler->program->source; } __attribute__((format(printf, 3, 0))) static void @@ -1132,7 +1130,7 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest pos = compiler->parser->prev.pos; uc_compiler_init(&fncompiler, NULL, compiler->parser->prev.pos, - uc_compiler_current_source(compiler), compiler->program, + compiler->program, uc_compiler_is_strict(compiler)); fncompiler.parent = compiler; @@ -1562,7 +1560,7 @@ uc_compiler_compile_function(uc_compiler_t *compiler) uc_compiler_init(&fncompiler, name ? ucv_string_get(name) : NULL, compiler->parser->prev.pos, - uc_compiler_current_source(compiler), compiler->program, + compiler->program, uc_compiler_is_strict(compiler)); fncompiler.parent = compiler; @@ -2873,29 +2871,42 @@ uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) uc_exprstack_t expr = { .token = TK_EOF }; uc_parser_t parser = { .config = config }; uc_compiler_t compiler = { .parser = &parser, .exprstack = &expr }; - uc_program_t *prog = uc_program_new(); - uc_function_t *fn; + uc_function_t *fn = NULL; + uc_program_t *prog; - uc_lexer_init(&parser.lex, config, source); - uc_compiler_init(&compiler, "main", 0, source, prog, - config && config->strict_declarations); + switch (uc_source_type_test(source)) { + case UC_SOURCE_TYPE_PLAIN: + prog = uc_program_new(source); - uc_compiler_parse_advance(&compiler); + uc_lexer_init(&parser.lex, config, source); + uc_compiler_init(&compiler, "main", 0, prog, + config && config->strict_declarations); - while (!uc_compiler_parse_match(&compiler, TK_EOF)) - uc_compiler_compile_declaration(&compiler); + uc_compiler_parse_advance(&compiler); - fn = uc_compiler_finish(&compiler); + while (!uc_compiler_parse_match(&compiler, TK_EOF)) + uc_compiler_compile_declaration(&compiler); - if (errp) { - *errp = parser.error ? parser.error->buf : NULL; - free(parser.error); - } - else { - printbuf_free(parser.error); - } + fn = uc_compiler_finish(&compiler); - uc_lexer_free(&parser.lex); + if (errp) { + *errp = parser.error ? parser.error->buf : NULL; + free(parser.error); + } + else { + printbuf_free(parser.error); + } + + uc_lexer_free(&parser.lex); + + break; + + default: + if (errp) + xasprintf(errp, "Unrecognized source type\n"); + + break; + } return fn; } diff --git a/include/ucode/program.h b/include/ucode/program.h index 9bbc67e..0f56f99 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -20,11 +20,11 @@ #include "types.h" -uc_program_t *uc_program_new(void); +uc_program_t *uc_program_new(uc_source_t *); void uc_program_free(uc_program_t *); -uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t, uc_source_t *); +uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t); size_t uc_program_function_id(uc_program_t *, uc_value_t *); uc_value_t *uc_program_function_load(uc_program_t *, size_t); diff --git a/include/ucode/source.h b/include/ucode/source.h index 3de7c93..e7a5667 100644 --- a/include/ucode/source.h +++ b/include/ucode/source.h @@ -25,6 +25,10 @@ #include "types.h" +typedef enum { + UC_SOURCE_TYPE_PLAIN = 0, +} uc_source_type_t; + uc_source_t *uc_source_new_file(const char *path); uc_source_t *uc_source_new_buffer(const char *name, char *buf, size_t len); @@ -33,4 +37,9 @@ size_t uc_source_get_line(uc_source_t *source, size_t *offset); uc_source_t *uc_source_get(uc_source_t *source); void uc_source_put(uc_source_t *source); +uc_source_type_t uc_source_type_test(uc_source_t *source); + +void uc_source_line_next(uc_source_t *source); +void uc_source_line_update(uc_source_t *source, size_t off); + #endif /* __SOURCE_H_ */ diff --git a/include/ucode/types.h b/include/ucode/types.h index be10ac5..66db5ea 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -154,7 +154,6 @@ typedef struct uc_function { size_t nupvals; size_t srcpos; uc_chunk_t chunk; - uc_source_t *source; struct uc_program *program; uc_weakref_t progref; char name[]; @@ -205,6 +204,7 @@ uc_declare_vector(uc_resource_types_t, uc_resource_type_t *); typedef struct uc_program { uc_value_list_t constants; uc_weakref_t functions; + uc_source_t *source; } uc_program_t; @@ -350,7 +350,7 @@ size_t ucv_object_length(uc_value_t *); : 0); \ entry##key = entry_next##key) -uc_value_t *ucv_function_new(const char *, size_t, uc_source_t *, uc_program_t *); +uc_value_t *ucv_function_new(const char *, size_t, uc_program_t *); size_t ucv_function_srcpos(uc_value_t *, size_t); uc_value_t *ucv_cfunction_new(const char *, uc_cfn_ptr_t); diff --git a/lexer.c b/lexer.c index 7637306..d554c0d 100644 --- a/lexer.c +++ b/lexer.c @@ -278,65 +278,14 @@ _buf_startswith(uc_lexer_t *lex, const char *str, size_t len) { #define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1) -/* lineinfo is encoded in bytes: the most significant bit specifies whether - * to advance the line count by one or not, while the remaining 7 bits encode - * the amounts of bytes on the current line. - * - * If a line has more than 127 characters, the first byte will be set to - * 0xff (1 1111111) and subsequent bytes will encode the remaining characters - * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus - * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). - * - * The newline character itself is not counted, so an empty line is encoded as - * 0x80 (1:0000000). - */ - -static void -next_lineinfo(uc_lexer_t *lex) -{ - uc_lineinfo_t *lines = &lex->source->lineinfo; - - uc_vector_grow(lines); - lines->entries[lines->count++] = 0x80; -} - -static void -update_lineinfo(uc_lexer_t *lex, size_t off) -{ - uc_lineinfo_t *lines = &lex->source->lineinfo; - uint8_t *entry, n; - - entry = uc_vector_last(lines); - - if ((entry[0] & 0x7f) + off <= 0x7f) { - entry[0] += off; - } - else { - off -= (0x7f - (entry[0] & 0x7f)); - entry[0] |= 0x7f; - - while (off > 0) { - n = (off > 0x7f) ? 0x7f : off; - uc_vector_grow(lines); - entry = uc_vector_last(lines); - entry[1] = n; - off -= n; - lines->count++; - } - } -} - static void buf_consume(uc_lexer_t *lex, size_t len) { size_t i, linelen; - if (!lex->source->lineinfo.count) - next_lineinfo(lex); - for (i = 0, linelen = 0; i < len; i++) { if (lex->bufstart[i] == '\n') { - update_lineinfo(lex, linelen); - next_lineinfo(lex); + uc_source_line_update(lex->source, linelen); + uc_source_line_next(lex->source); linelen = 0; } @@ -346,7 +295,7 @@ buf_consume(uc_lexer_t *lex, size_t len) { } if (linelen) - update_lineinfo(lex, linelen); + uc_source_line_update(lex->source, linelen); lex->bufstart += len; lex->source->off += len; @@ -1120,38 +1069,6 @@ lex_step(uc_lexer_t *lex, FILE *fp) return NULL; } -static void -uc_lexer_skip_shebang(uc_lexer_t *lex) -{ - uc_source_t *source = lex->source; - FILE *fp = source->fp; - int c1, c2; - - c1 = fgetc(fp); - c2 = fgetc(fp); - - if (c1 == '#' && c2 == '!') { - next_lineinfo(lex); - - source->off += 2; - - while ((c1 = fgetc(fp)) != EOF) { - source->off++; - - if (c1 == '\n') { - update_lineinfo(lex, source->off); - next_lineinfo(lex); - - break; - } - } - } - else { - ungetc(c2, fp); - ungetc(c1, fp); - } -} - void uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) { @@ -1187,10 +1104,6 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) lex->state = UC_LEX_IDENTIFY_TOKEN; lex->block = STATEMENTS; } - - /* Skip any potential interpreter line */ - if (lex->source->off == 0) - uc_lexer_skip_shebang(lex); } void diff --git a/lib.c b/lib.c index 7ded088..0e77b23 100644 --- a/lib.c +++ b/lib.c @@ -2093,7 +2093,7 @@ uc_include(uc_vm_t *vm, size_t nargs) if (!closure) return NULL; - p = include_path(closure->function->source->filename, ucv_string_get(path)); + p = include_path(closure->function->program->source->filename, ucv_string_get(path)); if (!p) { uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, @@ -2505,7 +2505,7 @@ uc_sourcepath(uc_vm_t *vm, size_t nargs) continue; } - path = realpath(frame->closure->function->source->filename, NULL); + path = realpath(frame->closure->function->program->source->filename, NULL); break; } diff --git a/program.c b/program.c index 5d3a104..f8190e4 100644 --- a/program.c +++ b/program.c @@ -15,11 +15,12 @@ */ #include "ucode/program.h" +#include "ucode/source.h" #include "ucode/vallist.h" uc_program_t * -uc_program_new(void) +uc_program_new(uc_source_t *source) { uc_program_t *prog; @@ -28,6 +29,8 @@ uc_program_new(void) prog->functions.next = &prog->functions; prog->functions.prev = &prog->functions; + prog->source = uc_source_get(source); + uc_vallist_init(&prog->constants); return prog; @@ -64,15 +67,16 @@ uc_program_free(uc_program_t *prog) } uc_vallist_free(&prog->constants); + uc_source_put(prog->source); free(prog); } uc_value_t * -uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos, uc_source_t *source) +uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos) { uc_function_t *func; - func = (uc_function_t *)ucv_function_new(name, srcpos, source, prog); + func = (uc_function_t *)ucv_function_new(name, srcpos, prog); func->root = (prog->functions.next == &prog->functions); ucv_ref(&prog->functions, &func->progref); diff --git a/source.c b/source.c index b7bb96d..aa73efd 100644 --- a/source.c +++ b/source.c @@ -15,6 +15,7 @@ */ #include +#include #include "ucode/source.h" @@ -116,3 +117,84 @@ uc_source_put(uc_source_t *source) free(source->buffer); free(source); } + +uc_source_type_t +uc_source_type_test(uc_source_t *source) +{ + union { char s[sizeof(uint32_t)]; uint32_t n; } buf; + uc_source_type_t type = UC_SOURCE_TYPE_PLAIN; + FILE *fp = source->fp; + int c; + + if (fread(buf.s, 1, 2, fp) == 2 && !strncmp(buf.s, "#!", 2)) { + source->off += 2; + + while ((c = fgetc(fp)) != EOF) { + source->off++; + + if (c == '\n') { + uc_source_line_update(source, source->off); + uc_source_line_next(source); + + break; + } + } + } + else { + if (fseek(fp, 0L, SEEK_SET) == -1) + fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno)); + } + + return type; +} + +/* lineinfo is encoded in bytes: the most significant bit specifies whether + * to advance the line count by one or not, while the remaining 7 bits encode + * the amounts of bytes on the current line. + * + * If a line has more than 127 characters, the first byte will be set to + * 0xff (1 1111111) and subsequent bytes will encode the remaining characters + * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus + * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). + * + * The newline character itself is not counted, so an empty line is encoded as + * 0x80 (1:0000000). + */ + +void +uc_source_line_next(uc_source_t *source) +{ + uc_lineinfo_t *lines = &source->lineinfo; + + uc_vector_grow(lines); + lines->entries[lines->count++] = 0x80; +} + +void +uc_source_line_update(uc_source_t *source, size_t off) +{ + uc_lineinfo_t *lines = &source->lineinfo; + uint8_t *entry, n; + + if (!lines->count) + uc_source_line_next(source); + + entry = uc_vector_last(lines); + + if ((entry[0] & 0x7f) + off <= 0x7f) { + entry[0] += off; + } + else { + off -= (0x7f - (entry[0] & 0x7f)); + entry[0] |= 0x7f; + + while (off > 0) { + n = (off > 0x7f) ? 0x7f : off; + uc_vector_grow(lines); + entry = uc_vector_last(lines); + entry[1] = n; + off -= n; + lines->count++; + } + } +} diff --git a/types.c b/types.c index 65cba22..68eba42 100644 --- a/types.c +++ b/types.c @@ -248,7 +248,6 @@ ucv_free(uc_value_t *uv, bool retain) } uc_chunk_free(&function->chunk); - uc_source_put(function->source); break; case UC_CLOSURE: @@ -951,7 +950,7 @@ ucv_object_length(uc_value_t *uv) uc_value_t * -ucv_function_new(const char *name, size_t srcpos, uc_source_t *source, uc_program_t *program) +ucv_function_new(const char *name, size_t srcpos, uc_program_t *program) { size_t namelen = 0; uc_function_t *fn; @@ -969,7 +968,6 @@ ucv_function_new(const char *name, size_t srcpos, uc_source_t *source, uc_progra fn->nargs = 0; fn->nupvals = 0; fn->srcpos = srcpos; - fn->source = uc_source_get(source); fn->program = program; fn->vararg = false; diff --git a/vm.c b/vm.c index 1766dc6..f6886ba 100644 --- a/vm.c +++ b/vm.c @@ -208,6 +208,12 @@ uc_vm_frame_program(uc_callframe_t *frame) return frame->closure ? frame->closure->function->program : NULL; } +static uc_source_t * +uc_vm_frame_source(uc_callframe_t *frame) +{ + return frame->closure ? frame->closure->function->program->source : NULL; +} + static uc_callframe_t * uc_vm_current_frame(uc_vm_t *vm) { @@ -586,18 +592,20 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn) uc_chunk_t *chunk = uc_vm_frame_chunk(frame); uc_stringbuf_t *buf = NULL; uc_value_t *cnst = NULL; + uc_source_t *source; size_t srcpos; srcpos = ucv_function_srcpos(&frame->closure->function->header, pos - chunk->entries); + source = uc_vm_frame_source(frame); - if (last_srcpos == 0 || last_source != frame->closure->function->source || srcpos != last_srcpos) { + if (last_srcpos == 0 || last_source != source || srcpos != last_srcpos) { buf = xprintbuf_new(); - uc_source_context_format(buf, frame->closure->function->source, srcpos, true); + uc_source_context_format(buf, source, srcpos, true); fwrite(buf->buf, 1, printbuf_length(buf), stderr); printbuf_free(buf); - last_source = frame->closure->function->source; + last_source = source; last_srcpos = srcpos; } @@ -802,6 +810,7 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) uc_value_t *stacktrace, *entry, *last = NULL; uc_function_t *function; uc_callframe_t *frame; + uc_source_t *source; size_t off, srcpos; char *name; @@ -813,12 +822,13 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) if (frame->closure) { function = frame->closure->function; + source = function->program->source; off = (frame->ip - uc_vm_frame_chunk(frame)->entries) - 1; srcpos = ucv_function_srcpos(&function->header, off); - ucv_object_add(entry, "filename", ucv_string_new(function->source->filename)); - ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(function->source, &srcpos))); + ucv_object_add(entry, "filename", ucv_string_new(source->filename)); + ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(source, &srcpos))); ucv_object_add(entry, "byte", ucv_int64_new(srcpos)); } @@ -876,7 +886,7 @@ uc_vm_get_error_context(uc_vm_t *vm) buf = ucv_stringbuf_new(); if (offset) - uc_error_context_format(buf, frame->closure->function->source, stacktrace, offset); + uc_error_context_format(buf, uc_vm_frame_source(frame), stacktrace, offset); else if (frame->ip != chunk->entries) ucv_stringbuf_printf(buf, "At instruction %zu", (frame->ip - chunk->entries) - 1); else @@ -2560,7 +2570,7 @@ uc_vm_execute(uc_vm_t *vm, uc_function_t *fn, uc_value_t **retval) if (vm->trace) { buf = xprintbuf_new(); - uc_source_context_format(buf, fn->source, 0, true); + uc_source_context_format(buf, uc_vm_frame_source(frame), 0, true); fwrite(buf->buf, 1, printbuf_length(buf), stderr); printbuf_free(buf); -- cgit v1.2.3 From 371ba457917cf319b74de5a56e17782f6c4cd77a Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Sat, 15 Jan 2022 21:22:37 +0100 Subject: program: implement support for precompiling source files - Introduce new command line flags `-o` and `-O` to write compiled program code into the specified output file - Add support for transparently executing precompiled files, the lexical analyzing and com,pilation phase is skipped in this case Signed-off-by: Jo-Philipp Wich --- compiler.c | 16 ++ include/ucode/program.h | 5 + include/ucode/source.h | 3 + include/ucode/util.h | 8 +- main.c | 40 ++- program.c | 668 ++++++++++++++++++++++++++++++++++++++++++++++++ source.c | 25 +- tests/cram/test_basic.t | 2 + 8 files changed, 755 insertions(+), 12 deletions(-) (limited to 'program.c') diff --git a/compiler.c b/compiler.c index a4c7381..119e7f7 100644 --- a/compiler.c +++ b/compiler.c @@ -2901,6 +2901,22 @@ uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) break; + case UC_SOURCE_TYPE_PRECOMPILED: + prog = uc_program_from_file(source->fp, errp); + + if (prog) { + fn = uc_program_entry(prog); + + if (!fn) { + if (errp) + xasprintf(errp, "Program file contains no entry function\n"); + + uc_program_free(prog); + } + } + + break; + default: if (errp) xasprintf(errp, "Unrecognized source type\n"); diff --git a/include/ucode/program.h b/include/ucode/program.h index 0f56f99..9a5c553 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -31,4 +31,9 @@ uc_value_t *uc_program_function_load(uc_program_t *, size_t); uc_value_t *uc_program_get_constant(uc_program_t *, size_t); ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); +void uc_program_to_file(uc_program_t *, FILE *, bool); +uc_program_t *uc_program_from_file(FILE *file, char **); + +uc_function_t *uc_program_entry(uc_program_t *); + #endif /* __PROGRAM_H_ */ diff --git a/include/ucode/source.h b/include/ucode/source.h index e7a5667..ac0b487 100644 --- a/include/ucode/source.h +++ b/include/ucode/source.h @@ -25,8 +25,11 @@ #include "types.h" +#define UC_PRECOMPILED_BYTECODE_MAGIC 0x1b756362 /* 'u' 'c' 'b' */ + typedef enum { UC_SOURCE_TYPE_PLAIN = 0, + UC_SOURCE_TYPE_PRECOMPILED = 1, } uc_source_type_t; uc_source_t *uc_source_new_file(const char *path); diff --git a/include/ucode/util.h b/include/ucode/util.h index 858a3fd..1ad13bd 100644 --- a/include/ucode/util.h +++ b/include/ucode/util.h @@ -71,8 +71,8 @@ /* "failsafe" utility functions */ -static inline void *xalloc(size_t size) { - void *ptr = calloc(1, size); +static inline void *xcalloc(size_t size, size_t nmemb) { + void *ptr = calloc(size, nmemb); if (!ptr) { fprintf(stderr, "Out of memory\n"); @@ -82,6 +82,10 @@ static inline void *xalloc(size_t size) { return ptr; } +static inline void *xalloc(size_t size) { + return xcalloc(1, size); +} + static inline void *xrealloc(void *ptr, size_t size) { ptr = realloc(ptr, size); diff --git a/main.c b/main.c index 834976d..aba9989 100644 --- a/main.c +++ b/main.c @@ -33,6 +33,7 @@ #include "ucode/lib.h" #include "ucode/vm.h" #include "ucode/source.h" +#include "ucode/program.h" static void @@ -52,7 +53,9 @@ print_usage(const char *app) " -e Set global variables from given JSON object\n" " -E Set global variables from given JSON file\n" " -x Disable given function\n" - " -m Preload given module\n", + " -m Preload given module\n" + " -o Write precompiled byte code to given file\n" + " -O Write precompiled byte code to given file and strip debug information\n", basename(app)); } @@ -75,7 +78,7 @@ register_variable(uc_value_t *scope, const char *key, uc_value_t *val) static int -compile(uc_vm_t *vm, uc_source_t *src) +compile(uc_vm_t *vm, uc_source_t *src, FILE *precompile, bool strip) { uc_value_t *res = NULL; uc_function_t *entry; @@ -91,6 +94,13 @@ compile(uc_vm_t *vm, uc_source_t *src) goto out; } + if (precompile) { + uc_program_to_file(entry->program, precompile, !strip); + uc_program_free(entry->program); + fclose(precompile); + goto out; + } + rc = uc_vm_execute(vm, entry, &res); switch (rc) { @@ -188,7 +198,9 @@ int main(int argc, char **argv) { uc_source_t *source = NULL, *envfile = NULL; + FILE *precompile = NULL; char *stdin = NULL, *c; + bool strip = false; uc_vm_t vm = { 0 }; uc_value_t *o, *p; int opt, rv = 0; @@ -219,7 +231,7 @@ main(int argc, char **argv) ucv_object_add(uc_vm_scope_get(&vm), "ARGV", o); /* parse options */ - while ((opt = getopt(argc, argv, "hlrtSRe:E:i:s:m:x:")) != -1) + while ((opt = getopt(argc, argv, "hlrtSRe:E:i:s:m:x:o:O:")) != -1) { switch (opt) { case 'h': @@ -354,6 +366,26 @@ main(int argc, char **argv) fprintf(stderr, "Unknown function %s specified\n", optarg); break; + + case 'o': + case 'O': + strip = (opt == 'O'); + + if (!strcmp(optarg, "-")) { + precompile = stdout; + } + else { + precompile = fopen(optarg, "wb"); + + if (!precompile) { + fprintf(stderr, "Unable to open output file %s: %s\n", + optarg, strerror(errno)); + + goto out; + } + } + + break; } } @@ -373,7 +405,7 @@ main(int argc, char **argv) goto out; } - rv = compile(&vm, source); + rv = compile(&vm, source, precompile, strip); out: uc_source_put(source); diff --git a/program.c b/program.c index f8190e4..f3b90b4 100644 --- a/program.c +++ b/program.c @@ -14,6 +14,10 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include +#include +#include + #include "ucode/program.h" #include "ucode/source.h" #include "ucode/vallist.h" @@ -121,3 +125,667 @@ uc_program_add_constant(uc_program_t *prog, uc_value_t *val) { return uc_vallist_add(&prog->constants, val); } + +static void +write_u16(size_t value, FILE *file) +{ + uint16_t n; + + if (sizeof(value) > sizeof(n)) + assert(value <= UINT16_MAX); + + n = htobe16((uint16_t)value); + + fwrite(&n, 1, sizeof(n), file); +} + +static void +write_u32(size_t value, FILE *file) +{ + uint32_t n; + + if (sizeof(value) > sizeof(n)) + assert(value <= UINT32_MAX); + + n = htobe32((uint32_t)value); + + fwrite(&n, 1, sizeof(n), file); +} + +static void +write_u64(uint64_t value, FILE *file) +{ + uint64_t n; + + if (sizeof(value) > sizeof(n)) + assert(value <= UINT64_MAX); + + n = htobe64((uint64_t)value); + + fwrite(&n, 1, sizeof(n), file); +} + +static void +_write_vector(size_t count, size_t itemsize, void *data, FILE *file) +{ + size_t pad = (~(count * itemsize) + 1) & (sizeof(uint32_t) - 1); + char z[sizeof(uint32_t) - 1] = { 0 }; + + write_u32(count, file); + fwrite(data, itemsize, count, file); + fwrite(z, 1, pad, file); +} + +#define write_vector(vec, file) \ + _write_vector((vec)->count, sizeof((vec)->entries[0]), (vec)->entries, file) + +#define write_string(str, file) \ + _write_vector(strlen(str) + 1, 1, str, file) + +static void +write_vallist(uc_value_list_t *vallist, FILE *file) +{ + size_t i; + + /* write index */ + write_u32(vallist->isize, file); + + for (i = 0; i < vallist->isize; i++) + write_u64(vallist->index[i], file); + + /* write data */ + write_u32(vallist->dsize, file); + fwrite(vallist->data, 1, vallist->dsize, file); +} + +enum { + UC_PROGRAM_F_DEBUG = (1 << 0), + UC_PROGRAM_F_SOURCEINFO = (1 << 1), + UC_PROGRAM_F_SOURCEBUF = (1 << 2), +}; + +enum { + UC_FUNCTION_F_IS_ARROW = (1 << 0), + UC_FUNCTION_F_IS_VARARG = (1 << 1), + UC_FUNCTION_F_IS_STRICT = (1 << 2), + UC_FUNCTION_F_HAS_EXCEPTIONS = (1 << 3), + UC_FUNCTION_F_HAS_NAME = (1 << 4), + UC_FUNCTION_F_HAS_VARDBG = (1 << 5), + UC_FUNCTION_F_HAS_OFFSETDBG = (1 << 6), +}; + +static void +write_chunk(uc_chunk_t *chunk, FILE *file, uint32_t flags) +{ + size_t i; + + /* write bytecode data */ + write_vector(chunk, file); + + /* write exception ranges */ + if (flags & UC_FUNCTION_F_HAS_EXCEPTIONS) { + write_u32(chunk->ehranges.count, file); + + for (i = 0; i < chunk->ehranges.count; i++) { + write_u32(chunk->ehranges.entries[i].from, file); + write_u32(chunk->ehranges.entries[i].to, file); + write_u32(chunk->ehranges.entries[i].target, file); + write_u32(chunk->ehranges.entries[i].slot, file); + } + } + + /* write variable info */ + if (flags & UC_FUNCTION_F_HAS_VARDBG) { + write_u32(chunk->debuginfo.variables.count, file); + + for (i = 0; i < chunk->debuginfo.variables.count; i++) { + write_u32(chunk->debuginfo.variables.entries[i].from, file); + write_u32(chunk->debuginfo.variables.entries[i].to, file); + write_u32(chunk->debuginfo.variables.entries[i].slot, file); + write_u32(chunk->debuginfo.variables.entries[i].nameidx, file); + } + + write_vallist(&chunk->debuginfo.varnames, file); + } + + /* write offset info */ + if (flags & UC_FUNCTION_F_HAS_OFFSETDBG) + write_vector(&chunk->debuginfo.offsets, file); +} + +static void +write_function(uc_function_t *func, FILE *file, bool debug) +{ + uint32_t flags = 0; + + if (func->arrow) + flags |= UC_FUNCTION_F_IS_ARROW; + + if (func->vararg) + flags |= UC_FUNCTION_F_IS_VARARG; + + if (func->strict) + flags |= UC_FUNCTION_F_IS_STRICT; + + if (func->chunk.ehranges.count) + flags |= UC_FUNCTION_F_HAS_EXCEPTIONS; + + if (debug && func->name[0]) + flags |= UC_FUNCTION_F_HAS_NAME; + + if (debug && func->chunk.debuginfo.variables.count) + flags |= UC_FUNCTION_F_HAS_VARDBG; + + if (debug && func->chunk.debuginfo.offsets.count) + flags |= UC_FUNCTION_F_HAS_OFFSETDBG; + + write_u32(flags, file); + + if (flags & UC_FUNCTION_F_HAS_NAME) + write_string(func->name, file); + + write_u16(func->nargs, file); + write_u16(func->nupvals, file); + write_u32(func->srcpos, file); + + write_chunk(&func->chunk, file, flags); +} + +void +uc_program_to_file(uc_program_t *prog, FILE *file, bool debug) +{ + uint32_t flags = 0; + uc_weakref_t *ref; + size_t i; + + if (debug) + flags |= UC_PROGRAM_F_DEBUG; + + if (debug && prog->source) { + flags |= UC_PROGRAM_F_SOURCEINFO; + + if (prog->source->buffer) + flags |= UC_PROGRAM_F_SOURCEBUF; + } + + /* magic word + flags */ + write_u32(UC_PRECOMPILED_BYTECODE_MAGIC, file); + write_u32(flags, file); + + if (flags & UC_PROGRAM_F_SOURCEINFO) { + /* write source file name */ + write_string(prog->source->filename, file); + + /* include source buffer if program was compiled from stdin */ + if (flags & UC_PROGRAM_F_SOURCEBUF) + write_string(prog->source->buffer, file); + + /* write lineinfo data */ + write_vector(&prog->source->lineinfo, file); + } + + /* write constants */ + write_vallist(&prog->constants, file); + + /* write program sections */ + for (i = 0, ref = prog->functions.prev; ref != &prog->functions; ref = ref->prev) + i++; + + write_u32(i, file); + + for (ref = prog->functions.prev; ref != &prog->functions; ref = ref->prev) + write_function(ref_to_function(ref), file, debug); +} + +static bool +read_error(FILE *file, char **errp, const char *subject, size_t rlen, size_t len) +{ + const char *reason; + + if (feof(file)) + reason = "Premature EOF"; + else + reason = strerror(errno); + + if (errp) + xasprintf(errp, + "%s while reading %s at offset %ld, got %zu of %zu bytes\n", + reason, subject, ftell(file) - rlen, rlen, len); + + return false; +} + +static bool +skip_padding(FILE *file, size_t len, char **errp) +{ + size_t pad = (~len + 1) & (sizeof(uint32_t) - 1), rlen; + char buf[sizeof(uint32_t) - 1]; + + if (pad != 0) { + rlen = fread(buf, 1, pad, file); + + if (rlen != pad) + return read_error(file, errp, "padding", rlen, pad); + } + + return true; +} + +static bool +read_u32(FILE *file, uint32_t *n, const char *subj, char **errp) +{ + size_t rlen = fread(n, 1, sizeof(*n), file); + + if (rlen != sizeof(*n)) { + *n = 0; + + return read_error(file, errp, subj ? subj : "uint32_t", rlen, sizeof(*n)); + } + + *n = be32toh(*n); + + return true; +} + +static bool +read_u64(FILE *file, uint64_t *n, const char *subj, char **errp) +{ + size_t rlen = fread(n, 1, sizeof(*n), file); + + if (rlen != sizeof(*n)) { + *n = 0; + + return read_error(file, errp, subj ? subj : "uint64_t", rlen, sizeof(*n)); + } + + *n = be64toh(*n); + + return true; +} + +static bool +read_size_t(FILE *file, size_t *n, size_t size, const char *subj, char **errp) +{ + union { uint8_t u8; uint16_t u16; uint32_t u32; uint64_t u64; } nval; + size_t rlen; + + rlen = fread(&nval.u64, 1, size, file); + + if (rlen != size) { + *n = 0; + + if (!subj) { + switch (size) { + case 1: subj = "uint8_t"; break; + case 2: subj = "uint16_t"; break; + case 4: subj = "uint32_t"; break; + case 8: subj = "uint64_t"; break; + } + } + + return read_error(file, errp, subj, rlen, sizeof(nval)); + } + + switch (size) { + case 1: *n = (size_t) nval.u8; break; + case 2: *n = (size_t)be16toh(nval.u16); break; + case 4: *n = (size_t)be32toh(nval.u32); break; + case 8: *n = (size_t)be64toh(nval.u64); break; + } + + return true; +} + +static bool +_read_vector(FILE *file, void *ptr, size_t itemsize, const char *subj, char **errp) +{ + struct { size_t count; void *data; } *vec = ptr; + size_t rlen, len; + char subjbuf[64]; + + snprintf(subjbuf, sizeof(subjbuf), "%s vector size", subj); + + if (!read_size_t(file, &vec->count, sizeof(uint32_t), subjbuf, errp)) + return false; + + vec->data = xcalloc(vec->count, itemsize); + + len = vec->count; + rlen = fread(vec->data, itemsize, len, file); + + if (rlen != len) { + free(vec->data); + + vec->count = 0; + vec->data = NULL; + + snprintf(subjbuf, sizeof(subjbuf), "%s vector data", subj); + + return read_error(file, errp, subjbuf, rlen * itemsize, len * itemsize); + } + + return skip_padding(file, vec->count * itemsize, errp); +} + +#define read_vector(file, vec, subj, errp) \ + _read_vector(file, vec, sizeof((vec)->entries[0]), subj, errp) + +static bool +read_string(FILE *file, char *dst, size_t len, const char *subj, char **errp) +{ + size_t rlen; + + rlen = fread(dst, 1, len, file); + + if (rlen != len) + return read_error(file, errp, subj, rlen, len); + + return skip_padding(file, len, errp); +} + +static bool +read_vallist(FILE *file, uc_value_list_t *vallist, const char *subj, char **errp) +{ + char subjbuf[64]; + size_t i; + + /* read index */ + snprintf(subjbuf, sizeof(subjbuf), "%s index size", subj); + + if (!read_size_t(file, &vallist->isize, sizeof(uint32_t), subjbuf, errp)) + goto out; + + vallist->index = xcalloc(sizeof(vallist->index[0]), vallist->isize); + + for (i = 0; i < vallist->isize; i++) { + snprintf(subjbuf, sizeof(subjbuf), "%s index entry %zu of %zu", subj, i, vallist->isize); + + if (!read_u64(file, &vallist->index[i], subjbuf, errp)) + goto out; + } + + /* read data */ + snprintf(subjbuf, sizeof(subjbuf), "%s data size", subj); + + if (!read_size_t(file, &vallist->dsize, sizeof(uint32_t), subjbuf, errp)) + goto out; + + vallist->data = xalloc(vallist->dsize); + + snprintf(subjbuf, sizeof(subjbuf), "%s data", subj); + + if (!read_string(file, vallist->data, vallist->dsize, subj, errp)) + goto out; + + return true; + +out: + free(vallist->index); + free(vallist->data); + + vallist->isize = 0; + vallist->index = NULL; + + vallist->dsize = 0; + vallist->data = NULL; + + return false; +} + +static uc_source_t * +read_sourceinfo(FILE *file, uint32_t flags, char **errp) +{ + char *path = NULL, *code = NULL; + uc_source_t *source = NULL; + size_t len; + + if (flags & UC_PROGRAM_F_SOURCEINFO) { + if (!read_size_t(file, &len, sizeof(uint32_t), "sourceinfo filename length", errp)) + goto out; + + path = xalloc(len); + + if (!read_string(file, path, len, "sourceinfo filename", errp)) + goto out; + + if (flags & UC_PROGRAM_F_SOURCEBUF) { + if (!read_size_t(file, &len, sizeof(uint32_t), "sourceinfo code buffer length", errp)) + goto out; + + code = xalloc(len); + + if (!read_string(file, code, len, "sourceinfo code buffer data", errp)) + goto out; + + source = uc_source_new_buffer(path, code, len); + } + else { + source = uc_source_new_file(path); + + if (!source) { + fprintf(stderr, "Unable to open source file %s: %s\n", path, strerror(errno)); + source = uc_source_new_buffer(path, "", 0); + } + } + + if (!read_vector(file, &source->lineinfo, "sourceinfo lineinfo", errp)) { + uc_source_put(source); + source = NULL; + goto out; + } + } + else { + source = uc_source_new_buffer("[no source]", xstrdup(""), 0); + } + +out: + free(path); + free(code); + + return source; +} + +static bool +read_chunk(FILE *file, uc_chunk_t *chunk, uint32_t flags, const char *subj, char **errp) +{ + uc_varrange_t *varrange; + uc_ehrange_t *ehrange; + char subjbuf[192]; + size_t i; + + /* read bytecode data */ + snprintf(subjbuf, sizeof(subjbuf), "%s byte code", subj); + + if (!read_vector(file, chunk, subjbuf, errp)) + goto out; + + /* read exception ranges */ + if (flags & UC_FUNCTION_F_HAS_EXCEPTIONS) { + snprintf(subjbuf, sizeof(subjbuf), "%s exception ranges count", subj); + + if (!read_size_t(file, &chunk->ehranges.count, sizeof(uint32_t), subjbuf, errp)) + goto out; + + chunk->ehranges.entries = xcalloc( + sizeof(chunk->ehranges.entries[0]), + chunk->ehranges.count); + + for (i = 0; i < chunk->ehranges.count; i++) { + snprintf(subjbuf, sizeof(subjbuf), "%s exception range %zu of %zu offset", + subj, i, chunk->ehranges.count); + + ehrange = &chunk->ehranges.entries[i]; + + if (!read_size_t(file, &ehrange->from, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &ehrange->to, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &ehrange->target, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &ehrange->slot, sizeof(uint32_t), subjbuf, errp)) + goto out; + } + } + + /* read variable info */ + if (flags & UC_FUNCTION_F_HAS_VARDBG) { + snprintf(subjbuf, sizeof(subjbuf), "%s variable scopes count", subj); + + if (!read_size_t(file, &chunk->debuginfo.variables.count, sizeof(uint32_t), subjbuf, errp)) + goto out; + + chunk->debuginfo.variables.entries = xcalloc( + sizeof(chunk->debuginfo.variables.entries[0]), + chunk->debuginfo.variables.count); + + for (i = 0; i < chunk->debuginfo.variables.count; i++) { + snprintf(subjbuf, sizeof(subjbuf), "%s variable scope %zu of %zu offset", + subj, i, chunk->debuginfo.variables.count); + + varrange = &chunk->debuginfo.variables.entries[i]; + + if (!read_size_t(file, &varrange->from, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &varrange->to, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &varrange->slot, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &varrange->nameidx, sizeof(uint32_t), subjbuf, errp)) + goto out; + } + + snprintf(subjbuf, sizeof(subjbuf), "%s variable names", subj); + + if (!read_vallist(file, &chunk->debuginfo.varnames, subjbuf, errp)) + goto out; + } + + /* read offset info */ + if (flags & UC_FUNCTION_F_HAS_OFFSETDBG) { + snprintf(subjbuf, sizeof(subjbuf), "%s source offsets", subj); + + if (!read_vector(file, &chunk->debuginfo.offsets, subjbuf, errp)) + goto out; + } + + return true; + +out: + uc_vallist_free(&chunk->debuginfo.varnames); + + free(chunk->entries); + free(chunk->ehranges.entries); + free(chunk->debuginfo.variables.entries); + + chunk->count = 0; + chunk->entries = NULL; + + chunk->ehranges.count = 0; + chunk->ehranges.entries = NULL; + + chunk->debuginfo.variables.count = 0; + chunk->debuginfo.variables.entries = NULL; + + return false; +} + +static bool +read_function(FILE *file, uc_program_t *program, size_t idx, char **errp) +{ + char subjbuf[64], *name = NULL; + uc_function_t *func = NULL; + uint32_t flags, u32; + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu flags", idx); + + if (!read_u32(file, &flags, subjbuf, errp)) + goto out; + + if (flags & UC_FUNCTION_F_HAS_NAME) { + snprintf(subjbuf, sizeof(subjbuf), "function #%zu name length", idx); + + if (!read_u32(file, &u32, subjbuf, errp)) + goto out; + + name = xalloc(u32); + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu name", idx); + + if (!read_string(file, name, u32, subjbuf, errp)) + goto out; + } + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) arg count and offset", idx, name ? name : "-"); + + func = (uc_function_t *)uc_program_function_new(program, name, 0); + func->arrow = (flags & UC_FUNCTION_F_IS_ARROW); + func->vararg = (flags & UC_FUNCTION_F_IS_VARARG); + func->strict = (flags & UC_FUNCTION_F_IS_STRICT); + + if (!read_size_t(file, &func->nargs, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &func->nupvals, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &func->srcpos, sizeof(uint32_t), subjbuf, errp)) + goto out; + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) body", idx, name ? name : "-"); + + if (!read_chunk(file, &func->chunk, flags, subjbuf, errp)) + goto out; + + free(name); + + return true; + +out: + free(name); + + return false; +} + +uc_program_t * +uc_program_from_file(FILE *file, char **errp) +{ + uc_program_t *program = NULL; + uc_source_t *source = NULL; + uint32_t flags, nfuncs, i; + + if (!read_u32(file, &i, "file magic", errp)) + goto out; + + if (i != UC_PRECOMPILED_BYTECODE_MAGIC) { + xasprintf(errp, "Invalid file magic\n"); + goto out; + } + + if (!read_u32(file, &flags, "program flags", errp)) + goto out; + + source = read_sourceinfo(file, flags, errp); + + if (!source) + goto out; + + program = uc_program_new(source); + + uc_source_put(source); + + if (!read_vallist(file, &program->constants, "constants", errp)) + goto out; + + if (!read_u32(file, &nfuncs, "function count", errp)) + goto out; + + for (i = 0; i < nfuncs; i++) + if (!read_function(file, program, i, errp)) + goto out; + + return program; + +out: + uc_program_free(program); + + return NULL; +} + +uc_function_t * +uc_program_entry(uc_program_t *program) +{ + if (program->functions.prev == &program->functions) + return NULL; + + return ref_to_function(program->functions.prev); +} diff --git a/source.c b/source.c index aa73efd..3b35b70 100644 --- a/source.c +++ b/source.c @@ -16,6 +16,7 @@ #include #include +#include #include "ucode/source.h" @@ -124,7 +125,8 @@ uc_source_type_test(uc_source_t *source) union { char s[sizeof(uint32_t)]; uint32_t n; } buf; uc_source_type_t type = UC_SOURCE_TYPE_PLAIN; FILE *fp = source->fp; - int c; + size_t rlen; + int c = 0; if (fread(buf.s, 1, 2, fp) == 2 && !strncmp(buf.s, "#!", 2)) { source->off += 2; @@ -132,12 +134,8 @@ uc_source_type_test(uc_source_t *source) while ((c = fgetc(fp)) != EOF) { source->off++; - if (c == '\n') { - uc_source_line_update(source, source->off); - uc_source_line_next(source); - + if (c == '\n') break; - } } } else { @@ -145,6 +143,21 @@ uc_source_type_test(uc_source_t *source) fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno)); } + rlen = fread(buf.s, 1, 4, fp); + + if (rlen == 4 && buf.n == htobe32(UC_PRECOMPILED_BYTECODE_MAGIC)) { + type = UC_SOURCE_TYPE_PRECOMPILED; + } + else { + uc_source_line_update(source, source->off); + + if (c == '\n') + uc_source_line_next(source); + } + + if (fseek(fp, -(long)rlen, SEEK_CUR) == -1) + fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno)); + return type; } diff --git a/tests/cram/test_basic.t b/tests/cram/test_basic.t index 2c22131..d2a3605 100644 --- a/tests/cram/test_basic.t +++ b/tests/cram/test_basic.t @@ -25,6 +25,8 @@ check that ucode provides exepected help: -E Set global variables from given JSON file -x Disable given function -m Preload given module + -o Write precompiled byte code to given file + -O Write precompiled byte code to given file and strip debug information check that ucode prints greetings: -- cgit v1.2.3