diff options
-rw-r--r-- | CMakeLists.txt | 8 | ||||
-rw-r--r-- | chunk.c | 14 | ||||
-rw-r--r-- | compiler.c | 97 | ||||
-rw-r--r-- | include/ucode/chunk.h | 2 | ||||
-rw-r--r-- | include/ucode/compiler.h | 1 | ||||
-rw-r--r-- | include/ucode/program.h | 39 | ||||
-rw-r--r-- | include/ucode/source.h | 12 | ||||
-rw-r--r-- | include/ucode/types.h | 22 | ||||
-rw-r--r-- | include/ucode/util.h | 8 | ||||
-rw-r--r-- | include/ucode/vallist.h | 2 | ||||
-rw-r--r-- | lexer.c | 205 | ||||
-rw-r--r-- | lib.c | 4 | ||||
-rw-r--r-- | main.c | 40 | ||||
-rw-r--r-- | program.c | 791 | ||||
-rw-r--r-- | source.c | 95 | ||||
-rw-r--r-- | tests/cram/test_basic.t | 2 | ||||
-rw-r--r-- | types.c | 20 | ||||
-rw-r--r-- | vallist.c | 43 | ||||
-rw-r--r-- | vm.c | 67 |
19 files changed, 1213 insertions, 259 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index e668751..0621d51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,12 @@ ADD_DEFINITIONS(-Wmissing-declarations -Wno-error=unused-variable -Wno-unused-pa INCLUDE_DIRECTORIES(include) +OPTION(COMPILE_SUPPORT "Support compilation from source" ON) + +IF(NOT COMPILE_SUPPORT) + ADD_DEFINITIONS(-DNO_COMPILE) +ENDIF() + OPTION(FS_SUPPORT "Filesystem plugin support" ON) OPTION(MATH_SUPPORT "Math plugin support" ON) OPTION(UBUS_SUPPORT "Ubus plugin support" ON) @@ -51,7 +57,7 @@ IF(JSONC_FOUND) INCLUDE_DIRECTORIES(${JSONC_INCLUDE_DIRS}) ENDIF() -SET(UCODE_SOURCES lexer.c lib.c vm.c chunk.c vallist.c compiler.c source.c types.c) +SET(UCODE_SOURCES lexer.c lib.c vm.c chunk.c vallist.c compiler.c source.c types.c program.c) ADD_LIBRARY(libucode SHARED ${UCODE_SOURCES}) SET(SOVERSION 0 CACHE STRING "Override ucode library version") SET_TARGET_PROPERTIES(libucode PROPERTIES OUTPUT_NAME ucode SOVERSION ${SOVERSION}) @@ -45,7 +45,6 @@ uc_chunk_init(uc_chunk_t *chunk) chunk->debuginfo.variables.count = 0; chunk->debuginfo.variables.entries = NULL; - uc_vallist_init(&chunk->constants); uc_vallist_init(&chunk->debuginfo.varnames); } @@ -54,7 +53,6 @@ uc_chunk_free(uc_chunk_t *chunk) { uc_vector_clear(chunk); uc_vector_clear(&chunk->ehranges); - uc_vallist_free(&chunk->constants); uc_vector_clear(&chunk->debuginfo.offsets); uc_vector_clear(&chunk->debuginfo.variables); @@ -136,18 +134,6 @@ uc_chunk_pop(uc_chunk_t *chunk) } } -uc_value_t * -uc_chunk_get_constant(uc_chunk_t *chunk, size_t idx) -{ - return uc_vallist_get(&chunk->constants, idx); -} - -ssize_t -uc_chunk_add_constant(uc_chunk_t *chunk, uc_value_t *val) -{ - return uc_vallist_add(&chunk->constants, val); -} - size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off) { @@ -21,8 +21,11 @@ #include "ucode/chunk.h" #include "ucode/vm.h" /* I_* */ #include "ucode/source.h" +#include "ucode/program.h" #include "ucode/lib.h" /* uc_error_context_format() */ +#ifndef NO_COMPILE + static void uc_compiler_compile_unary(uc_compiler_t *compiler); static void uc_compiler_compile_binary(uc_compiler_t *compiler); static void uc_compiler_compile_delete(uc_compiler_t *compiler); @@ -113,14 +116,15 @@ uc_compiler_exprstack_is(uc_compiler_t *compiler, uc_exprflag_t flag) } static void -uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_source_t *source, bool strict) +uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_program_t *program, bool strict) { uc_value_t *varname = ucv_string_new("(callee)"); uc_function_t *fn; compiler->scope_depth = 0; - compiler->function = ucv_function_new(name, srcpos, source); + compiler->program = program; + compiler->function = uc_program_function_new(program, name, srcpos); compiler->locals.count = 0; compiler->locals.entries = NULL; @@ -154,9 +158,7 @@ uc_compiler_current_chunk(uc_compiler_t *compiler) static uc_source_t * uc_compiler_current_source(uc_compiler_t *compiler) { - uc_function_t *fn = (uc_function_t *)compiler->function; - - return fn->source; + return compiler->program->source; } __attribute__((format(printf, 3, 0))) static void @@ -481,8 +483,7 @@ uc_compiler_set_u32(uc_compiler_t *compiler, size_t off, uint32_t n) static size_t uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) { - uc_chunk_t *chunk = uc_compiler_current_chunk(compiler); - size_t cidx = uc_chunk_add_constant(chunk, val); + size_t cidx = uc_program_add_constant(compiler->program, val); uc_compiler_emit_insn(compiler, srcpos, I_LOAD); uc_compiler_emit_u32(compiler, 0, cidx); @@ -493,8 +494,7 @@ uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *va static size_t uc_compiler_emit_regexp(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) { - uc_chunk_t *chunk = uc_compiler_current_chunk(compiler); - size_t cidx = uc_chunk_add_constant(chunk, val); + size_t cidx = uc_program_add_constant(compiler->program, val); uc_compiler_emit_insn(compiler, srcpos, I_LREXP); uc_compiler_emit_u32(compiler, 0, cidx); @@ -1084,7 +1084,7 @@ uc_compiler_emit_variable_rw(uc_compiler_t *compiler, uc_value_t *varname, uc_to ((sub_insn & 0xff) << 24) | idx); } else { - idx = uc_chunk_add_constant(uc_compiler_current_chunk(compiler), varname); + idx = uc_program_add_constant(compiler->program, varname); insn = sub_insn ? I_UVAR : (type ? I_SVAR : I_LVAR); uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, insn); @@ -1132,7 +1132,7 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest pos = compiler->parser->prev.pos; uc_compiler_init(&fncompiler, NULL, compiler->parser->prev.pos, - uc_compiler_current_source(compiler), + compiler->program, uc_compiler_is_strict(compiler)); fncompiler.parent = compiler; @@ -1193,8 +1193,7 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest if (fn) uc_compiler_set_u32(compiler, load_off, - uc_chunk_add_constant(uc_compiler_current_chunk(compiler), - &fn->header)); + uc_program_add_constant(compiler->program, &fn->header)); return true; } @@ -1563,7 +1562,7 @@ uc_compiler_compile_function(uc_compiler_t *compiler) uc_compiler_init(&fncompiler, name ? ucv_string_get(name) : NULL, compiler->parser->prev.pos, - uc_compiler_current_source(compiler), + compiler->program, uc_compiler_is_strict(compiler)); fncompiler.parent = compiler; @@ -1633,8 +1632,7 @@ uc_compiler_compile_function(uc_compiler_t *compiler) if (fn) uc_compiler_set_u32(compiler, load_off, - uc_chunk_add_constant(uc_compiler_current_chunk(compiler), - &fn->header)); + uc_program_add_constant(compiler->program, &fn->header)); /* if a local variable of the same name already existed, overwrite its value * with the compiled function here */ @@ -2869,16 +2867,28 @@ uc_compiler_compile_declaration(uc_compiler_t *compiler) uc_compiler_parse_synchronize(compiler); } -uc_function_t * -uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) +#endif /* NO_COMPILE */ + + +static uc_function_t * +uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **errp) { +#ifdef NO_COMPILE + if (errp) + xasprintf(errp, "Source code compilation not supported\n"); + + return NULL; +#else + uc_function_t *fn = NULL; uc_exprstack_t expr = { .token = TK_EOF }; uc_parser_t parser = { .config = config }; uc_compiler_t compiler = { .parser = &parser, .exprstack = &expr }; - uc_function_t *fn; + uc_program_t *prog; + + prog = uc_program_new(source); uc_lexer_init(&parser.lex, config, source); - uc_compiler_init(&compiler, "main", 0, source, + uc_compiler_init(&compiler, "main", 0, prog, config && config->strict_declarations); uc_compiler_parse_advance(&compiler); @@ -2899,4 +2909,51 @@ uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) uc_lexer_free(&parser.lex); return fn; +#endif +} + +static uc_function_t * +uc_compile_from_bytecode(uc_parse_config_t *config, uc_source_t *source, char **errp) +{ + uc_function_t *fn = NULL; + uc_program_t *prog; + + prog = uc_program_from_file(source->fp, errp); + + if (prog) { + fn = uc_program_entry(prog); + + if (!fn) { + if (errp) + xasprintf(errp, "Program file contains no entry function\n"); + + uc_program_free(prog); + } + } + + return fn; +} + +uc_function_t * +uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) +{ + uc_function_t *fn = NULL; + + switch (uc_source_type_test(source)) { + case UC_SOURCE_TYPE_PLAIN: + fn = uc_compile_from_source(config, source, errp); + break; + + case UC_SOURCE_TYPE_PRECOMPILED: + fn = uc_compile_from_bytecode(config, source, errp); + break; + + default: + if (errp) + xasprintf(errp, "Unrecognized source type\n"); + + break; + } + + return fn; } diff --git a/include/ucode/chunk.h b/include/ucode/chunk.h index 0005e3c..6804eeb 100644 --- a/include/ucode/chunk.h +++ b/include/ucode/chunk.h @@ -28,8 +28,6 @@ void uc_chunk_init(uc_chunk_t *chunk); void uc_chunk_free(uc_chunk_t *chunk); size_t uc_chunk_add(uc_chunk_t *chunk, uint8_t byte, size_t line); -ssize_t uc_chunk_add_constant(uc_chunk_t *chunk, uc_value_t *value); -uc_value_t *uc_chunk_get_constant(uc_chunk_t *chunk, size_t idx); void uc_chunk_pop(uc_chunk_t *chunk); size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off); diff --git a/include/ucode/compiler.h b/include/ucode/compiler.h index 04fc0ef..df242dc 100644 --- a/include/ucode/compiler.h +++ b/include/ucode/compiler.h @@ -116,6 +116,7 @@ typedef struct uc_compiler { uc_exprstack_t *exprstack; uc_value_t *function; uc_parser_t *parser; + uc_program_t *program; size_t scope_depth, current_srcpos, last_insn; } uc_compiler_t; diff --git a/include/ucode/program.h b/include/ucode/program.h new file mode 100644 index 0000000..9a5c553 --- /dev/null +++ b/include/ucode/program.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2022 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __PROGRAM_H_ +#define __PROGRAM_H_ + +#include "types.h" + + +uc_program_t *uc_program_new(uc_source_t *); + +void uc_program_free(uc_program_t *); + +uc_value_t *uc_program_function_new(uc_program_t *, const char *, size_t); +size_t uc_program_function_id(uc_program_t *, uc_value_t *); +uc_value_t *uc_program_function_load(uc_program_t *, size_t); + +uc_value_t *uc_program_get_constant(uc_program_t *, size_t); +ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); + +void uc_program_to_file(uc_program_t *, FILE *, bool); +uc_program_t *uc_program_from_file(FILE *file, char **); + +uc_function_t *uc_program_entry(uc_program_t *); + +#endif /* __PROGRAM_H_ */ diff --git a/include/ucode/source.h b/include/ucode/source.h index 3de7c93..ac0b487 100644 --- a/include/ucode/source.h +++ b/include/ucode/source.h @@ -25,6 +25,13 @@ #include "types.h" +#define UC_PRECOMPILED_BYTECODE_MAGIC 0x1b756362 /* <esc> 'u' 'c' 'b' */ + +typedef enum { + UC_SOURCE_TYPE_PLAIN = 0, + UC_SOURCE_TYPE_PRECOMPILED = 1, +} uc_source_type_t; + uc_source_t *uc_source_new_file(const char *path); uc_source_t *uc_source_new_buffer(const char *name, char *buf, size_t len); @@ -33,4 +40,9 @@ size_t uc_source_get_line(uc_source_t *source, size_t *offset); uc_source_t *uc_source_get(uc_source_t *source); void uc_source_put(uc_source_t *source); +uc_source_type_t uc_source_type_test(uc_source_t *source); + +void uc_source_line_next(uc_source_t *source); +void uc_source_line_update(uc_source_t *source, size_t off); + #endif /* __SOURCE_H_ */ diff --git a/include/ucode/types.h b/include/ucode/types.h index cbd03dd..66db5ea 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -90,7 +90,6 @@ uc_declare_vector(uc_offsetinfo_t, uint8_t); typedef struct { size_t count; uint8_t *entries; - uc_value_list_t constants; uc_ehranges_t ehranges; struct { uc_variables_t variables; @@ -148,14 +147,15 @@ typedef struct { char source[]; } uc_regexp_t; -typedef struct { +typedef struct uc_function { uc_value_t header; - bool arrow, vararg, strict; + bool arrow, vararg, strict, root; size_t nargs; size_t nupvals; size_t srcpos; uc_chunk_t chunk; - uc_source_t *source; + struct uc_program *program; + uc_weakref_t progref; char name[]; } uc_function_t; @@ -199,6 +199,15 @@ typedef struct { uc_declare_vector(uc_resource_types_t, uc_resource_type_t *); +/* Program structure definitions */ + +typedef struct uc_program { + uc_value_list_t constants; + uc_weakref_t functions; + uc_source_t *source; +} uc_program_t; + + /* Parser definitions */ typedef struct { @@ -275,6 +284,9 @@ struct uc_vm { void ucv_free(uc_value_t *, bool); void ucv_put(uc_value_t *); +void ucv_unref(uc_weakref_t *); +void ucv_ref(uc_weakref_t *, uc_weakref_t *); + uc_value_t *ucv_get(uc_value_t *uv); uc_type_t ucv_type(uc_value_t *); @@ -338,7 +350,7 @@ size_t ucv_object_length(uc_value_t *); : 0); \ entry##key = entry_next##key) -uc_value_t *ucv_function_new(const char *, size_t, uc_source_t *); +uc_value_t *ucv_function_new(const char *, size_t, uc_program_t *); size_t ucv_function_srcpos(uc_value_t *, size_t); uc_value_t *ucv_cfunction_new(const char *, uc_cfn_ptr_t); diff --git a/include/ucode/util.h b/include/ucode/util.h index 858a3fd..1ad13bd 100644 --- a/include/ucode/util.h +++ b/include/ucode/util.h @@ -71,8 +71,8 @@ /* "failsafe" utility functions */ -static inline void *xalloc(size_t size) { - void *ptr = calloc(1, size); +static inline void *xcalloc(size_t size, size_t nmemb) { + void *ptr = calloc(size, nmemb); if (!ptr) { fprintf(stderr, "Out of memory\n"); @@ -82,6 +82,10 @@ static inline void *xalloc(size_t size) { return ptr; } +static inline void *xalloc(size_t size) { + return xcalloc(1, size); +} + static inline void *xrealloc(void *ptr, size_t size) { ptr = realloc(ptr, size); diff --git a/include/ucode/vallist.h b/include/ucode/vallist.h index a1b33a5..f1c1437 100644 --- a/include/ucode/vallist.h +++ b/include/ucode/vallist.h @@ -38,7 +38,7 @@ typedef enum { TAG_DBL = 3, TAG_STR = 4, TAG_LSTR = 5, - TAG_PTR = 6 + TAG_FUNC = 6 } uc_value_type_t; uc_value_t *uc_number_parse(const char *buf, char **end); @@ -54,6 +54,8 @@ struct token { (((x) >= 'a') ? (10 + (x) - 'a') : \ (((x) >= 'A') ? (10 + (x) - 'A') : dec(x))) +#ifndef NO_COMPILE + static uc_token_t *parse_comment(uc_lexer_t *); static uc_token_t *parse_string(uc_lexer_t *); static uc_token_t *parse_regexp(uc_lexer_t *); @@ -165,60 +167,6 @@ static const struct keyword reserved_words[] = { }; -/* - * Stores the given codepoint as a utf8 multibyte sequence into the given - * output buffer and substracts the required amount of bytes from the given - * length pointer. - * - * Returns false if the multibyte sequence would not fit into the buffer, - * otherwise true. - */ - -bool -utf8enc(char **out, int *rem, int code) -{ - if (code >= 0 && code <= 0x7F) { - if (*rem < 1) - return false; - - *(*out)++ = code; (*rem)--; - - return true; - } - else if (code > 0 && code <= 0x7FF) { - if (*rem < 2) - return false; - - *(*out)++ = ((code >> 6) & 0x1F) | 0xC0; (*rem)--; - *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--; - - return true; - } - else if (code > 0 && code <= 0xFFFF) { - if (*rem < 3) - return false; - - *(*out)++ = ((code >> 12) & 0x0F) | 0xE0; (*rem)--; - *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--; - *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--; - - return true; - } - else if (code > 0 && code <= 0x10FFFF) { - if (*rem < 4) - return false; - - *(*out)++ = ((code >> 18) & 0x07) | 0xF0; (*rem)--; - *(*out)++ = ((code >> 12) & 0x3F) | 0x80; (*rem)--; - *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--; - *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--; - - return true; - } - - return true; -} - /* length of the longest token in our lookup table */ #define UC_LEX_MAX_TOKEN_LEN 3 @@ -278,65 +226,14 @@ _buf_startswith(uc_lexer_t *lex, const char *str, size_t len) { #define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1) -/* lineinfo is encoded in bytes: the most significant bit specifies whether - * to advance the line count by one or not, while the remaining 7 bits encode - * the amounts of bytes on the current line. - * - * If a line has more than 127 characters, the first byte will be set to - * 0xff (1 1111111) and subsequent bytes will encode the remaining characters - * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus - * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). - * - * The newline character itself is not counted, so an empty line is encoded as - * 0x80 (1:0000000). - */ - -static void -next_lineinfo(uc_lexer_t *lex) -{ - uc_lineinfo_t *lines = &lex->source->lineinfo; - - uc_vector_grow(lines); - lines->entries[lines->count++] = 0x80; -} - -static void -update_lineinfo(uc_lexer_t *lex, size_t off) -{ - uc_lineinfo_t *lines = &lex->source->lineinfo; - uint8_t *entry, n; - - entry = uc_vector_last(lines); - - if ((entry[0] & 0x7f) + off <= 0x7f) { - entry[0] += off; - } - else { - off -= (0x7f - (entry[0] & 0x7f)); - entry[0] |= 0x7f; - - while (off > 0) { - n = (off > 0x7f) ? 0x7f : off; - uc_vector_grow(lines); - entry = uc_vector_last(lines); - entry[1] = n; - off -= n; - lines->count++; - } - } -} - static void buf_consume(uc_lexer_t *lex, size_t len) { size_t i, linelen; - if (!lex->source->lineinfo.count) - next_lineinfo(lex); - for (i = 0, linelen = 0; i < len; i++) { if (lex->bufstart[i] == '\n') { - update_lineinfo(lex, linelen); - next_lineinfo(lex); + uc_source_line_update(lex->source, linelen); + uc_source_line_next(lex->source); linelen = 0; } @@ -346,7 +243,7 @@ buf_consume(uc_lexer_t *lex, size_t len) { } if (linelen) - update_lineinfo(lex, linelen); + uc_source_line_update(lex->source, linelen); lex->bufstart += len; lex->source->off += len; @@ -1120,38 +1017,6 @@ lex_step(uc_lexer_t *lex, FILE *fp) return NULL; } -static void -uc_lexer_skip_shebang(uc_lexer_t *lex) -{ - uc_source_t *source = lex->source; - FILE *fp = source->fp; - int c1, c2; - - c1 = fgetc(fp); - c2 = fgetc(fp); - - if (c1 == '#' && c2 == '!') { - next_lineinfo(lex); - - source->off += 2; - - while ((c1 = fgetc(fp)) != EOF) { - source->off++; - - if (c1 == '\n') { - update_lineinfo(lex, source->off); - next_lineinfo(lex); - - break; - } - } - } - else { - ungetc(c2, fp); - ungetc(c1, fp); - } -} - void uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) { @@ -1187,10 +1052,6 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) lex->state = UC_LEX_IDENTIFY_TOKEN; lex->block = STATEMENTS; } - - /* Skip any potential interpreter line */ - if (lex->source->off == 0) - uc_lexer_skip_shebang(lex); } void @@ -1274,3 +1135,59 @@ uc_lexer_is_keyword(uc_value_t *label) return false; } + +#endif /* NO_COMPILE */ + +/* + * Stores the given codepoint as a utf8 multibyte sequence into the given + * output buffer and substracts the required amount of bytes from the given + * length pointer. + * + * Returns false if the multibyte sequence would not fit into the buffer, + * otherwise true. + */ + +bool +utf8enc(char **out, int *rem, int code) +{ + if (code >= 0 && code <= 0x7F) { + if (*rem < 1) + return false; + + *(*out)++ = code; (*rem)--; + + return true; + } + else if (code > 0 && code <= 0x7FF) { + if (*rem < 2) + return false; + + *(*out)++ = ((code >> 6) & 0x1F) | 0xC0; (*rem)--; + *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--; + + return true; + } + else if (code > 0 && code <= 0xFFFF) { + if (*rem < 3) + return false; + + *(*out)++ = ((code >> 12) & 0x0F) | 0xE0; (*rem)--; + *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--; + *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--; + + return true; + } + else if (code > 0 && code <= 0x10FFFF) { + if (*rem < 4) + return false; + + *(*out)++ = ((code >> 18) & 0x07) | 0xF0; (*rem)--; + *(*out)++ = ((code >> 12) & 0x3F) | 0x80; (*rem)--; + *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--; + *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--; + + return true; + } + + return true; +} @@ -2093,7 +2093,7 @@ uc_include(uc_vm_t *vm, size_t nargs) if (!closure) return NULL; - p = include_path(closure->function->source->filename, ucv_string_get(path)); + p = include_path(closure->function->program->source->filename, ucv_string_get(path)); if (!p) { uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, @@ -2505,7 +2505,7 @@ uc_sourcepath(uc_vm_t *vm, size_t nargs) continue; } - path = realpath(frame->closure->function->source->filename, NULL); + path = realpath(frame->closure->function->program->source->filename, NULL); break; } @@ -33,6 +33,7 @@ #include "ucode/lib.h" #include "ucode/vm.h" #include "ucode/source.h" +#include "ucode/program.h" static void @@ -52,7 +53,9 @@ print_usage(const char *app) " -e Set global variables from given JSON object\n" " -E Set global variables from given JSON file\n" " -x Disable given function\n" - " -m Preload given module\n", + " -m Preload given module\n" + " -o Write precompiled byte code to given file\n" + " -O Write precompiled byte code to given file and strip debug information\n", basename(app)); } @@ -75,7 +78,7 @@ register_variable(uc_value_t *scope, const char *key, uc_value_t *val) static int -compile(uc_vm_t *vm, uc_source_t *src) +compile(uc_vm_t *vm, uc_source_t *src, FILE *precompile, bool strip) { uc_value_t *res = NULL; uc_function_t *entry; @@ -91,6 +94,13 @@ compile(uc_vm_t *vm, uc_source_t *src) goto out; } + if (precompile) { + uc_program_to_file(entry->program, precompile, !strip); + uc_program_free(entry->program); + fclose(precompile); + goto out; + } + rc = uc_vm_execute(vm, entry, &res); switch (rc) { @@ -188,7 +198,9 @@ int main(int argc, char **argv) { uc_source_t *source = NULL, *envfile = NULL; + FILE *precompile = NULL; char *stdin = NULL, *c; + bool strip = false; uc_vm_t vm = { 0 }; uc_value_t *o, *p; int opt, rv = 0; @@ -219,7 +231,7 @@ main(int argc, char **argv) ucv_object_add(uc_vm_scope_get(&vm), "ARGV", o); /* parse options */ - while ((opt = getopt(argc, argv, "hlrtSRe:E:i:s:m:x:")) != -1) + while ((opt = getopt(argc, argv, "hlrtSRe:E:i:s:m:x:o:O:")) != -1) { switch (opt) { case 'h': @@ -354,6 +366,26 @@ main(int argc, char **argv) fprintf(stderr, "Unknown function %s specified\n", optarg); break; + + case 'o': + case 'O': + strip = (opt == 'O'); + + if (!strcmp(optarg, "-")) { + precompile = stdout; + } + else { + precompile = fopen(optarg, "wb"); + + if (!precompile) { + fprintf(stderr, "Unable to open output file %s: %s\n", + optarg, strerror(errno)); + + goto out; + } + } + + break; } } @@ -373,7 +405,7 @@ main(int argc, char **argv) goto out; } - rv = compile(&vm, source); + rv = compile(&vm, source, precompile, strip); out: uc_source_put(source); diff --git a/program.c b/program.c new file mode 100644 index 0000000..f3b90b4 --- /dev/null +++ b/program.c @@ -0,0 +1,791 @@ +/* + * Copyright (C) 2022 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <assert.h> +#include <errno.h> +#include <endian.h> + +#include "ucode/program.h" +#include "ucode/source.h" +#include "ucode/vallist.h" + + +uc_program_t * +uc_program_new(uc_source_t *source) +{ + uc_program_t *prog; + + prog = xalloc(sizeof(*prog)); + + prog->functions.next = &prog->functions; + prog->functions.prev = &prog->functions; + + prog->source = uc_source_get(source); + + uc_vallist_init(&prog->constants); + + return prog; +} + +static inline uc_function_t * +ref_to_function(uc_weakref_t *ref) +{ + return (uc_function_t *)((uintptr_t)ref - offsetof(uc_function_t, progref)); +} + +static inline uc_value_t * +ref_to_uv(uc_weakref_t *ref) +{ + return (uc_value_t *)((uintptr_t)ref - offsetof(uc_function_t, progref)); +} + +void +uc_program_free(uc_program_t *prog) +{ + uc_weakref_t *ref, *tmp; + uc_function_t *func; + + if (!prog) + return; + + for (ref = prog->functions.next, tmp = ref->next; ref != &prog->functions; ref = tmp, tmp = tmp->next) { + func = ref_to_function(ref); + func->program = NULL; + func->progref.next = NULL; + func->progref.prev = NULL; + + ucv_put(&func->header); + } + + uc_vallist_free(&prog->constants); + uc_source_put(prog->source); + free(prog); +} + +uc_value_t * +uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos) +{ + uc_function_t *func; + + func = (uc_function_t *)ucv_function_new(name, srcpos, prog); + func->root = (prog->functions.next == &prog->functions); + + ucv_ref(&prog->functions, &func->progref); + + return &func->header; +} + +size_t +uc_program_function_id(uc_program_t *prog, uc_value_t *func) +{ + uc_weakref_t *ref; + size_t i; + + for (ref = prog->functions.prev, i = 1; ref != &prog->functions; ref = ref->prev, i++) + if (ref_to_uv(ref) == func) + return i; + + return 0; +} + +uc_value_t * +uc_program_function_load(uc_program_t *prog, size_t id) +{ + uc_weakref_t *ref; + size_t i; + + for (ref = prog->functions.prev, i = 1; ref != &prog->functions; ref = ref->prev, i++) + if (i == id) + return ref_to_uv(ref); + + return NULL; +} + +uc_value_t * +uc_program_get_constant(uc_program_t *prog, size_t idx) +{ + return uc_vallist_get(&prog->constants, idx); +} + +ssize_t +uc_program_add_constant(uc_program_t *prog, uc_value_t *val) +{ + return uc_vallist_add(&prog->constants, val); +} + +static void +write_u16(size_t value, FILE *file) +{ + uint16_t n; + + if (sizeof(value) > sizeof(n)) + assert(value <= UINT16_MAX); + + n = htobe16((uint16_t)value); + + fwrite(&n, 1, sizeof(n), file); +} + +static void +write_u32(size_t value, FILE *file) +{ + uint32_t n; + + if (sizeof(value) > sizeof(n)) + assert(value <= UINT32_MAX); + + n = htobe32((uint32_t)value); + + fwrite(&n, 1, sizeof(n), file); +} + +static void +write_u64(uint64_t value, FILE *file) +{ + uint64_t n; + + if (sizeof(value) > sizeof(n)) + assert(value <= UINT64_MAX); + + n = htobe64((uint64_t)value); + + fwrite(&n, 1, sizeof(n), file); +} + +static void +_write_vector(size_t count, size_t itemsize, void *data, FILE *file) +{ + size_t pad = (~(count * itemsize) + 1) & (sizeof(uint32_t) - 1); + char z[sizeof(uint32_t) - 1] = { 0 }; + + write_u32(count, file); + fwrite(data, itemsize, count, file); + fwrite(z, 1, pad, file); +} + +#define write_vector(vec, file) \ + _write_vector((vec)->count, sizeof((vec)->entries[0]), (vec)->entries, file) + +#define write_string(str, file) \ + _write_vector(strlen(str) + 1, 1, str, file) + +static void +write_vallist(uc_value_list_t *vallist, FILE *file) +{ + size_t i; + + /* write index */ + write_u32(vallist->isize, file); + + for (i = 0; i < vallist->isize; i++) + write_u64(vallist->index[i], file); + + /* write data */ + write_u32(vallist->dsize, file); + fwrite(vallist->data, 1, vallist->dsize, file); +} + +enum { + UC_PROGRAM_F_DEBUG = (1 << 0), + UC_PROGRAM_F_SOURCEINFO = (1 << 1), + UC_PROGRAM_F_SOURCEBUF = (1 << 2), +}; + +enum { + UC_FUNCTION_F_IS_ARROW = (1 << 0), + UC_FUNCTION_F_IS_VARARG = (1 << 1), + UC_FUNCTION_F_IS_STRICT = (1 << 2), + UC_FUNCTION_F_HAS_EXCEPTIONS = (1 << 3), + UC_FUNCTION_F_HAS_NAME = (1 << 4), + UC_FUNCTION_F_HAS_VARDBG = (1 << 5), + UC_FUNCTION_F_HAS_OFFSETDBG = (1 << 6), +}; + +static void +write_chunk(uc_chunk_t *chunk, FILE *file, uint32_t flags) +{ + size_t i; + + /* write bytecode data */ + write_vector(chunk, file); + + /* write exception ranges */ + if (flags & UC_FUNCTION_F_HAS_EXCEPTIONS) { + write_u32(chunk->ehranges.count, file); + + for (i = 0; i < chunk->ehranges.count; i++) { + write_u32(chunk->ehranges.entries[i].from, file); + write_u32(chunk->ehranges.entries[i].to, file); + write_u32(chunk->ehranges.entries[i].target, file); + write_u32(chunk->ehranges.entries[i].slot, file); + } + } + + /* write variable info */ + if (flags & UC_FUNCTION_F_HAS_VARDBG) { + write_u32(chunk->debuginfo.variables.count, file); + + for (i = 0; i < chunk->debuginfo.variables.count; i++) { + write_u32(chunk->debuginfo.variables.entries[i].from, file); + write_u32(chunk->debuginfo.variables.entries[i].to, file); + write_u32(chunk->debuginfo.variables.entries[i].slot, file); + write_u32(chunk->debuginfo.variables.entries[i].nameidx, file); + } + + write_vallist(&chunk->debuginfo.varnames, file); + } + + /* write offset info */ + if (flags & UC_FUNCTION_F_HAS_OFFSETDBG) + write_vector(&chunk->debuginfo.offsets, file); +} + +static void +write_function(uc_function_t *func, FILE *file, bool debug) +{ + uint32_t flags = 0; + + if (func->arrow) + flags |= UC_FUNCTION_F_IS_ARROW; + + if (func->vararg) + flags |= UC_FUNCTION_F_IS_VARARG; + + if (func->strict) + flags |= UC_FUNCTION_F_IS_STRICT; + + if (func->chunk.ehranges.count) + flags |= UC_FUNCTION_F_HAS_EXCEPTIONS; + + if (debug && func->name[0]) + flags |= UC_FUNCTION_F_HAS_NAME; + + if (debug && func->chunk.debuginfo.variables.count) + flags |= UC_FUNCTION_F_HAS_VARDBG; + + if (debug && func->chunk.debuginfo.offsets.count) + flags |= UC_FUNCTION_F_HAS_OFFSETDBG; + + write_u32(flags, file); + + if (flags & UC_FUNCTION_F_HAS_NAME) + write_string(func->name, file); + + write_u16(func->nargs, file); + write_u16(func->nupvals, file); + write_u32(func->srcpos, file); + + write_chunk(&func->chunk, file, flags); +} + +void +uc_program_to_file(uc_program_t *prog, FILE *file, bool debug) +{ + uint32_t flags = 0; + uc_weakref_t *ref; + size_t i; + + if (debug) + flags |= UC_PROGRAM_F_DEBUG; + + if (debug && prog->source) { + flags |= UC_PROGRAM_F_SOURCEINFO; + + if (prog->source->buffer) + flags |= UC_PROGRAM_F_SOURCEBUF; + } + + /* magic word + flags */ + write_u32(UC_PRECOMPILED_BYTECODE_MAGIC, file); + write_u32(flags, file); + + if (flags & UC_PROGRAM_F_SOURCEINFO) { + /* write source file name */ + write_string(prog->source->filename, file); + + /* include source buffer if program was compiled from stdin */ + if (flags & UC_PROGRAM_F_SOURCEBUF) + write_string(prog->source->buffer, file); + + /* write lineinfo data */ + write_vector(&prog->source->lineinfo, file); + } + + /* write constants */ + write_vallist(&prog->constants, file); + + /* write program sections */ + for (i = 0, ref = prog->functions.prev; ref != &prog->functions; ref = ref->prev) + i++; + + write_u32(i, file); + + for (ref = prog->functions.prev; ref != &prog->functions; ref = ref->prev) + write_function(ref_to_function(ref), file, debug); +} + +static bool +read_error(FILE *file, char **errp, const char *subject, size_t rlen, size_t len) +{ + const char *reason; + + if (feof(file)) + reason = "Premature EOF"; + else + reason = strerror(errno); + + if (errp) + xasprintf(errp, + "%s while reading %s at offset %ld, got %zu of %zu bytes\n", + reason, subject, ftell(file) - rlen, rlen, len); + + return false; +} + +static bool +skip_padding(FILE *file, size_t len, char **errp) +{ + size_t pad = (~len + 1) & (sizeof(uint32_t) - 1), rlen; + char buf[sizeof(uint32_t) - 1]; + + if (pad != 0) { + rlen = fread(buf, 1, pad, file); + + if (rlen != pad) + return read_error(file, errp, "padding", rlen, pad); + } + + return true; +} + +static bool +read_u32(FILE *file, uint32_t *n, const char *subj, char **errp) +{ + size_t rlen = fread(n, 1, sizeof(*n), file); + + if (rlen != sizeof(*n)) { + *n = 0; + + return read_error(file, errp, subj ? subj : "uint32_t", rlen, sizeof(*n)); + } + + *n = be32toh(*n); + + return true; +} + +static bool +read_u64(FILE *file, uint64_t *n, const char *subj, char **errp) +{ + size_t rlen = fread(n, 1, sizeof(*n), file); + + if (rlen != sizeof(*n)) { + *n = 0; + + return read_error(file, errp, subj ? subj : "uint64_t", rlen, sizeof(*n)); + } + + *n = be64toh(*n); + + return true; +} + +static bool +read_size_t(FILE *file, size_t *n, size_t size, const char *subj, char **errp) +{ + union { uint8_t u8; uint16_t u16; uint32_t u32; uint64_t u64; } nval; + size_t rlen; + + rlen = fread(&nval.u64, 1, size, file); + + if (rlen != size) { + *n = 0; + + if (!subj) { + switch (size) { + case 1: subj = "uint8_t"; break; + case 2: subj = "uint16_t"; break; + case 4: subj = "uint32_t"; break; + case 8: subj = "uint64_t"; break; + } + } + + return read_error(file, errp, subj, rlen, sizeof(nval)); + } + + switch (size) { + case 1: *n = (size_t) nval.u8; break; + case 2: *n = (size_t)be16toh(nval.u16); break; + case 4: *n = (size_t)be32toh(nval.u32); break; + case 8: *n = (size_t)be64toh(nval.u64); break; + } + + return true; +} + +static bool +_read_vector(FILE *file, void *ptr, size_t itemsize, const char *subj, char **errp) +{ + struct { size_t count; void *data; } *vec = ptr; + size_t rlen, len; + char subjbuf[64]; + + snprintf(subjbuf, sizeof(subjbuf), "%s vector size", subj); + + if (!read_size_t(file, &vec->count, sizeof(uint32_t), subjbuf, errp)) + return false; + + vec->data = xcalloc(vec->count, itemsize); + + len = vec->count; + rlen = fread(vec->data, itemsize, len, file); + + if (rlen != len) { + free(vec->data); + + vec->count = 0; + vec->data = NULL; + + snprintf(subjbuf, sizeof(subjbuf), "%s vector data", subj); + + return read_error(file, errp, subjbuf, rlen * itemsize, len * itemsize); + } + + return skip_padding(file, vec->count * itemsize, errp); +} + +#define read_vector(file, vec, subj, errp) \ + _read_vector(file, vec, sizeof((vec)->entries[0]), subj, errp) + +static bool +read_string(FILE *file, char *dst, size_t len, const char *subj, char **errp) +{ + size_t rlen; + + rlen = fread(dst, 1, len, file); + + if (rlen != len) + return read_error(file, errp, subj, rlen, len); + + return skip_padding(file, len, errp); +} + +static bool +read_vallist(FILE *file, uc_value_list_t *vallist, const char *subj, char **errp) +{ + char subjbuf[64]; + size_t i; + + /* read index */ + snprintf(subjbuf, sizeof(subjbuf), "%s index size", subj); + + if (!read_size_t(file, &vallist->isize, sizeof(uint32_t), subjbuf, errp)) + goto out; + + vallist->index = xcalloc(sizeof(vallist->index[0]), vallist->isize); + + for (i = 0; i < vallist->isize; i++) { + snprintf(subjbuf, sizeof(subjbuf), "%s index entry %zu of %zu", subj, i, vallist->isize); + + if (!read_u64(file, &vallist->index[i], subjbuf, errp)) + goto out; + } + + /* read data */ + snprintf(subjbuf, sizeof(subjbuf), "%s data size", subj); + + if (!read_size_t(file, &vallist->dsize, sizeof(uint32_t), subjbuf, errp)) + goto out; + + vallist->data = xalloc(vallist->dsize); + + snprintf(subjbuf, sizeof(subjbuf), "%s data", subj); + + if (!read_string(file, vallist->data, vallist->dsize, subj, errp)) + goto out; + + return true; + +out: + free(vallist->index); + free(vallist->data); + + vallist->isize = 0; + vallist->index = NULL; + + vallist->dsize = 0; + vallist->data = NULL; + + return false; +} + +static uc_source_t * +read_sourceinfo(FILE *file, uint32_t flags, char **errp) +{ + char *path = NULL, *code = NULL; + uc_source_t *source = NULL; + size_t len; + + if (flags & UC_PROGRAM_F_SOURCEINFO) { + if (!read_size_t(file, &len, sizeof(uint32_t), "sourceinfo filename length", errp)) + goto out; + + path = xalloc(len); + + if (!read_string(file, path, len, "sourceinfo filename", errp)) + goto out; + + if (flags & UC_PROGRAM_F_SOURCEBUF) { + if (!read_size_t(file, &len, sizeof(uint32_t), "sourceinfo code buffer length", errp)) + goto out; + + code = xalloc(len); + + if (!read_string(file, code, len, "sourceinfo code buffer data", errp)) + goto out; + + source = uc_source_new_buffer(path, code, len); + } + else { + source = uc_source_new_file(path); + + if (!source) { + fprintf(stderr, "Unable to open source file %s: %s\n", path, strerror(errno)); + source = uc_source_new_buffer(path, "", 0); + } + } + + if (!read_vector(file, &source->lineinfo, "sourceinfo lineinfo", errp)) { + uc_source_put(source); + source = NULL; + goto out; + } + } + else { + source = uc_source_new_buffer("[no source]", xstrdup(""), 0); + } + +out: + free(path); + free(code); + + return source; +} + +static bool +read_chunk(FILE *file, uc_chunk_t *chunk, uint32_t flags, const char *subj, char **errp) +{ + uc_varrange_t *varrange; + uc_ehrange_t *ehrange; + char subjbuf[192]; + size_t i; + + /* read bytecode data */ + snprintf(subjbuf, sizeof(subjbuf), "%s byte code", subj); + + if (!read_vector(file, chunk, subjbuf, errp)) + goto out; + + /* read exception ranges */ + if (flags & UC_FUNCTION_F_HAS_EXCEPTIONS) { + snprintf(subjbuf, sizeof(subjbuf), "%s exception ranges count", subj); + + if (!read_size_t(file, &chunk->ehranges.count, sizeof(uint32_t), subjbuf, errp)) + goto out; + + chunk->ehranges.entries = xcalloc( + sizeof(chunk->ehranges.entries[0]), + chunk->ehranges.count); + + for (i = 0; i < chunk->ehranges.count; i++) { + snprintf(subjbuf, sizeof(subjbuf), "%s exception range %zu of %zu offset", + subj, i, chunk->ehranges.count); + + ehrange = &chunk->ehranges.entries[i]; + + if (!read_size_t(file, &ehrange->from, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &ehrange->to, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &ehrange->target, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &ehrange->slot, sizeof(uint32_t), subjbuf, errp)) + goto out; + } + } + + /* read variable info */ + if (flags & UC_FUNCTION_F_HAS_VARDBG) { + snprintf(subjbuf, sizeof(subjbuf), "%s variable scopes count", subj); + + if (!read_size_t(file, &chunk->debuginfo.variables.count, sizeof(uint32_t), subjbuf, errp)) + goto out; + + chunk->debuginfo.variables.entries = xcalloc( + sizeof(chunk->debuginfo.variables.entries[0]), + chunk->debuginfo.variables.count); + + for (i = 0; i < chunk->debuginfo.variables.count; i++) { + snprintf(subjbuf, sizeof(subjbuf), "%s variable scope %zu of %zu offset", + subj, i, chunk->debuginfo.variables.count); + + varrange = &chunk->debuginfo.variables.entries[i]; + + if (!read_size_t(file, &varrange->from, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &varrange->to, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &varrange->slot, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &varrange->nameidx, sizeof(uint32_t), subjbuf, errp)) + goto out; + } + + snprintf(subjbuf, sizeof(subjbuf), "%s variable names", subj); + + if (!read_vallist(file, &chunk->debuginfo.varnames, subjbuf, errp)) + goto out; + } + + /* read offset info */ + if (flags & UC_FUNCTION_F_HAS_OFFSETDBG) { + snprintf(subjbuf, sizeof(subjbuf), "%s source offsets", subj); + + if (!read_vector(file, &chunk->debuginfo.offsets, subjbuf, errp)) + goto out; + } + + return true; + +out: + uc_vallist_free(&chunk->debuginfo.varnames); + + free(chunk->entries); + free(chunk->ehranges.entries); + free(chunk->debuginfo.variables.entries); + + chunk->count = 0; + chunk->entries = NULL; + + chunk->ehranges.count = 0; + chunk->ehranges.entries = NULL; + + chunk->debuginfo.variables.count = 0; + chunk->debuginfo.variables.entries = NULL; + + return false; +} + +static bool +read_function(FILE *file, uc_program_t *program, size_t idx, char **errp) +{ + char subjbuf[64], *name = NULL; + uc_function_t *func = NULL; + uint32_t flags, u32; + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu flags", idx); + + if (!read_u32(file, &flags, subjbuf, errp)) + goto out; + + if (flags & UC_FUNCTION_F_HAS_NAME) { + snprintf(subjbuf, sizeof(subjbuf), "function #%zu name length", idx); + + if (!read_u32(file, &u32, subjbuf, errp)) + goto out; + + name = xalloc(u32); + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu name", idx); + + if (!read_string(file, name, u32, subjbuf, errp)) + goto out; + } + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) arg count and offset", idx, name ? name : "-"); + + func = (uc_function_t *)uc_program_function_new(program, name, 0); + func->arrow = (flags & UC_FUNCTION_F_IS_ARROW); + func->vararg = (flags & UC_FUNCTION_F_IS_VARARG); + func->strict = (flags & UC_FUNCTION_F_IS_STRICT); + + if (!read_size_t(file, &func->nargs, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &func->nupvals, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &func->srcpos, sizeof(uint32_t), subjbuf, errp)) + goto out; + + snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) body", idx, name ? name : "-"); + + if (!read_chunk(file, &func->chunk, flags, subjbuf, errp)) + goto out; + + free(name); + + return true; + +out: + free(name); + + return false; +} + +uc_program_t * +uc_program_from_file(FILE *file, char **errp) +{ + uc_program_t *program = NULL; + uc_source_t *source = NULL; + uint32_t flags, nfuncs, i; + + if (!read_u32(file, &i, "file magic", errp)) + goto out; + + if (i != UC_PRECOMPILED_BYTECODE_MAGIC) { + xasprintf(errp, "Invalid file magic\n"); + goto out; + } + + if (!read_u32(file, &flags, "program flags", errp)) + goto out; + + source = read_sourceinfo(file, flags, errp); + + if (!source) + goto out; + + program = uc_program_new(source); + + uc_source_put(source); + + if (!read_vallist(file, &program->constants, "constants", errp)) + goto out; + + if (!read_u32(file, &nfuncs, "function count", errp)) + goto out; + + for (i = 0; i < nfuncs; i++) + if (!read_function(file, program, i, errp)) + goto out; + + return program; + +out: + uc_program_free(program); + + return NULL; +} + +uc_function_t * +uc_program_entry(uc_program_t *program) +{ + if (program->functions.prev == &program->functions) + return NULL; + + return ref_to_function(program->functions.prev); +} @@ -15,6 +15,8 @@ */ #include <string.h> +#include <errno.h> +#include <endian.h> #include "ucode/source.h" @@ -116,3 +118,96 @@ uc_source_put(uc_source_t *source) free(source->buffer); free(source); } + +uc_source_type_t +uc_source_type_test(uc_source_t *source) +{ + union { char s[sizeof(uint32_t)]; uint32_t n; } buf; + uc_source_type_t type = UC_SOURCE_TYPE_PLAIN; + FILE *fp = source->fp; + size_t rlen; + int c = 0; + + if (fread(buf.s, 1, 2, fp) == 2 && !strncmp(buf.s, "#!", 2)) { + source->off += 2; + + while ((c = fgetc(fp)) != EOF) { + source->off++; + + if (c == '\n') + break; + } + } + else { + if (fseek(fp, 0L, SEEK_SET) == -1) + fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno)); + } + + rlen = fread(buf.s, 1, 4, fp); + + if (rlen == 4 && buf.n == htobe32(UC_PRECOMPILED_BYTECODE_MAGIC)) { + type = UC_SOURCE_TYPE_PRECOMPILED; + } + else { + uc_source_line_update(source, source->off); + + if (c == '\n') + uc_source_line_next(source); + } + + if (fseek(fp, -(long)rlen, SEEK_CUR) == -1) + fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno)); + + return type; +} + +/* lineinfo is encoded in bytes: the most significant bit specifies whether + * to advance the line count by one or not, while the remaining 7 bits encode + * the amounts of bytes on the current line. + * + * If a line has more than 127 characters, the first byte will be set to + * 0xff (1 1111111) and subsequent bytes will encode the remaining characters + * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus + * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). + * + * The newline character itself is not counted, so an empty line is encoded as + * 0x80 (1:0000000). + */ + +void +uc_source_line_next(uc_source_t *source) +{ + uc_lineinfo_t *lines = &source->lineinfo; + + uc_vector_grow(lines); + lines->entries[lines->count++] = 0x80; +} + +void +uc_source_line_update(uc_source_t *source, size_t off) +{ + uc_lineinfo_t *lines = &source->lineinfo; + uint8_t *entry, n; + + if (!lines->count) + uc_source_line_next(source); + + entry = uc_vector_last(lines); + + if ((entry[0] & 0x7f) + off <= 0x7f) { + entry[0] += off; + } + else { + off -= (0x7f - (entry[0] & 0x7f)); + entry[0] |= 0x7f; + + while (off > 0) { + n = (off > 0x7f) ? 0x7f : off; + uc_vector_grow(lines); + entry = uc_vector_last(lines); + entry[1] = n; + off -= n; + lines->count++; + } + } +} diff --git a/tests/cram/test_basic.t b/tests/cram/test_basic.t index 2c22131..d2a3605 100644 --- a/tests/cram/test_basic.t +++ b/tests/cram/test_basic.t @@ -25,6 +25,8 @@ check that ucode provides exepected help: -E Set global variables from given JSON file -x Disable given function -m Preload given module + -o Write precompiled byte code to given file + -O Write precompiled byte code to given file and strip debug information check that ucode prints greetings: @@ -26,6 +26,7 @@ #include "ucode/types.h" #include "ucode/util.h" #include "ucode/vm.h" +#include "ucode/program.h" uc_type_t ucv_type(uc_value_t *uv) @@ -60,14 +61,14 @@ ucv_typename(uc_value_t *uv) return "unknown"; } -static void +void ucv_unref(uc_weakref_t *ref) { ref->prev->next = ref->next; ref->next->prev = ref->prev; } -static void +void ucv_ref(uc_weakref_t *head, uc_weakref_t *item) { item->next = head->next; @@ -238,8 +239,15 @@ ucv_free(uc_value_t *uv, bool retain) case UC_FUNCTION: function = (uc_function_t *)uv; + + if (function->program) { + ucv_unref(&function->progref); + + if (function->root) + uc_program_free(function->program); + } + uc_chunk_free(&function->chunk); - uc_source_put(function->source); break; case UC_CLOSURE: @@ -942,7 +950,7 @@ ucv_object_length(uc_value_t *uv) uc_value_t * -ucv_function_new(const char *name, size_t srcpos, uc_source_t *source) +ucv_function_new(const char *name, size_t srcpos, uc_program_t *program) { size_t namelen = 0; uc_function_t *fn; @@ -960,7 +968,7 @@ ucv_function_new(const char *name, size_t srcpos, uc_source_t *source) fn->nargs = 0; fn->nupvals = 0; fn->srcpos = srcpos; - fn->source = uc_source_get(source); + fn->program = program; fn->vararg = false; uc_chunk_init(&fn->chunk); @@ -1020,6 +1028,8 @@ ucv_closure_new(uc_vm_t *vm, uc_function_t *function, bool arrow_fn) if (vm) ucv_ref(&vm->values, &closure->ref); + ucv_get(&function->header); + return &closure->header; } @@ -24,6 +24,7 @@ #include "ucode/util.h" #include "ucode/chunk.h" +#include "ucode/program.h" #include "ucode/vallist.h" #include "ucode/vm.h" @@ -271,17 +272,6 @@ uc_vallist_init(uc_value_list_t *list) void uc_vallist_free(uc_value_list_t *list) { - uc_value_t *o; - size_t i; - - for (i = 0; i < list->isize; i++) { - if (TAG_GET_TYPE(list->index[i]) == TAG_PTR) { - o = uc_vallist_get(list, i); - ucv_put(o); - ucv_put(o); - } - } - free(list->index); free(list->data); uc_vallist_init(list); @@ -476,22 +466,13 @@ find_str(uc_value_list_t *list, const char *s, size_t slen) } static void -add_ptr(uc_value_list_t *list, void *ptr) +add_func(uc_value_list_t *list, uc_function_t *func) { - size_t sz = TAG_ALIGN(sizeof(ptr)); + size_t id = uc_program_function_id(func->program, &func->header); - if ((TAG_TYPE)list->dsize + sz > TAG_MASK) { - fprintf(stderr, "Constant data too large\n"); - abort(); - } - - list->data = xrealloc(list->data, list->dsize + sz); - - memset(list->data + list->dsize, 0, sz); - memcpy(list->data + list->dsize, &ptr, sizeof(ptr)); + assert(id != 0 && TAG_FIT_NV(id)); - list->index[list->isize++] = (uint64_t)(TAG_PTR | (list->dsize << TAG_BITS)); - list->dsize += sz; + list->index[list->isize++] = (TAG_TYPE)(TAG_FUNC | TAG_SET_NV(id)); } ssize_t @@ -545,7 +526,7 @@ uc_vallist_add(uc_value_list_t *list, uc_value_t *value) break; case UC_FUNCTION: - add_ptr(list, value); + add_func(list, (uc_function_t *)value); break; default: @@ -564,10 +545,15 @@ uc_vallist_type(uc_value_list_t *list, size_t idx) return TAG_GET_TYPE(list->index[idx]); } +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + uc_value_t * uc_vallist_get(uc_value_list_t *list, size_t idx) { char str[sizeof(TAG_TYPE)]; + uc_program_t *program; size_t n, len; switch (uc_vallist_type(list, idx)) { @@ -605,11 +591,10 @@ uc_vallist_get(uc_value_list_t *list, size_t idx) return ucv_string_new_length(list->data + TAG_GET_OFFSET(list->index[idx]) + sizeof(uint32_t), len); - case TAG_PTR: - if (TAG_GET_OFFSET(list->index[idx]) + sizeof(void *) > list->dsize) - return NULL; + case TAG_FUNC: + program = container_of(list, uc_program_t, constants); - return ucv_get(*(uc_value_t **)(list->data + TAG_GET_OFFSET(list->index[idx]))); + return uc_program_function_load(program, TAG_GET_NV(list->index[idx])); default: return NULL; @@ -24,6 +24,7 @@ #include "ucode/vm.h" #include "ucode/compiler.h" +#include "ucode/program.h" #include "ucode/lib.h" /* uc_error_context_format() */ #undef __insn @@ -201,16 +202,28 @@ uc_vm_frame_chunk(uc_callframe_t *frame) return frame->closure ? &frame->closure->function->chunk : NULL; } +static uc_program_t * +uc_vm_frame_program(uc_callframe_t *frame) +{ + return frame->closure ? frame->closure->function->program : NULL; +} + +static uc_source_t * +uc_vm_frame_source(uc_callframe_t *frame) +{ + return frame->closure ? frame->closure->function->program->source : NULL; +} + static uc_callframe_t * uc_vm_current_frame(uc_vm_t *vm) { return uc_vector_last(&vm->callframes); } -static uc_chunk_t * -uc_vm_current_chunk(uc_vm_t *vm) +static uc_program_t * +uc_vm_current_program(uc_vm_t *vm) { - return uc_vm_frame_chunk(uc_vm_current_frame(vm)); + return uc_vm_frame_program(uc_vm_current_frame(vm)); } static bool @@ -319,18 +332,6 @@ uc_vm_frame_dump(uc_vm_t *vm, uc_callframe_t *frame) uc_vm_format_val(vm, frame->ctx)); if (chunk) { - fprintf(stderr, " |- %zu constants\n", - chunk->constants.isize); - - for (i = 0; i < chunk->constants.isize; i++) { - v = uc_chunk_get_constant(chunk, i); - - fprintf(stderr, " | [%zu] %s\n", - i, uc_vm_format_val(vm, v)); - - ucv_put(v); - } - closure = frame->closure; function = closure->function; @@ -591,18 +592,20 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn) uc_chunk_t *chunk = uc_vm_frame_chunk(frame); uc_stringbuf_t *buf = NULL; uc_value_t *cnst = NULL; + uc_source_t *source; size_t srcpos; srcpos = ucv_function_srcpos(&frame->closure->function->header, pos - chunk->entries); + source = uc_vm_frame_source(frame); - if (last_srcpos == 0 || last_source != frame->closure->function->source || srcpos != last_srcpos) { + if (last_srcpos == 0 || last_source != source || srcpos != last_srcpos) { buf = xprintbuf_new(); - uc_source_context_format(buf, frame->closure->function->source, srcpos, true); + uc_source_context_format(buf, source, srcpos, true); fwrite(buf->buf, 1, printbuf_length(buf), stderr); printbuf_free(buf); - last_source = frame->closure->function->source; + last_source = source; last_srcpos = srcpos; } @@ -649,7 +652,7 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn) case I_LOAD: case I_LVAR: case I_SVAR: - cnst = uc_chunk_get_constant(uc_vm_frame_chunk(uc_vector_last(&vm->callframes)), vm->arg.u32); + cnst = uc_program_get_constant(uc_vm_frame_program(uc_vector_last(&vm->callframes)), vm->arg.u32); fprintf(stderr, "\t; %s", cnst ? uc_vm_format_val(vm, cnst) : "(?)"); @@ -676,7 +679,7 @@ uc_dump_insn(uc_vm_t *vm, uint8_t *pos, uc_vm_insn_t insn) case I_UVAR: if (!cnst) - cnst = uc_chunk_get_constant(uc_vm_frame_chunk(uc_vector_last(&vm->callframes)), vm->arg.u32 & 0x00ffffff); + cnst = uc_program_get_constant(uc_vm_frame_program(uc_vector_last(&vm->callframes)), vm->arg.u32 & 0x00ffffff); fprintf(stderr, "\t; %s (%s)", cnst ? uc_vm_format_val(vm, cnst) : "(?)", @@ -807,6 +810,7 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) uc_value_t *stacktrace, *entry, *last = NULL; uc_function_t *function; uc_callframe_t *frame; + uc_source_t *source; size_t off, srcpos; char *name; @@ -818,12 +822,13 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) if (frame->closure) { function = frame->closure->function; + source = function->program->source; off = (frame->ip - uc_vm_frame_chunk(frame)->entries) - 1; srcpos = ucv_function_srcpos(&function->header, off); - ucv_object_add(entry, "filename", ucv_string_new(function->source->filename)); - ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(function->source, &srcpos))); + ucv_object_add(entry, "filename", ucv_string_new(source->filename)); + ucv_object_add(entry, "line", ucv_int64_new(uc_source_get_line(source, &srcpos))); ucv_object_add(entry, "byte", ucv_int64_new(srcpos)); } @@ -881,7 +886,7 @@ uc_vm_get_error_context(uc_vm_t *vm) buf = ucv_stringbuf_new(); if (offset) - uc_error_context_format(buf, frame->closure->function->source, stacktrace, offset); + uc_error_context_format(buf, uc_vm_frame_source(frame), stacktrace, offset); else if (frame->ip != chunk->entries) ucv_stringbuf_printf(buf, "At instruction %zu", (frame->ip - chunk->entries) - 1); else @@ -915,7 +920,7 @@ uc_vm_insn_load(uc_vm_t *vm, uc_vm_insn_t insn) { switch (insn) { case I_LOAD: - uc_vm_stack_push(vm, uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32)); + uc_vm_stack_push(vm, uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32)); break; case I_LOAD8: @@ -938,7 +943,7 @@ uc_vm_insn_load(uc_vm_t *vm, uc_vm_insn_t insn) static void uc_vm_insn_load_regexp(uc_vm_t *vm, uc_vm_insn_t insn) { - uc_value_t *re, *jstr = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + uc_value_t *re, *jstr = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); bool icase = false, newline = false, global = false; char *str, *err = NULL; @@ -987,7 +992,7 @@ uc_vm_insn_load_var(uc_vm_t *vm, uc_vm_insn_t insn) bool found; scope = vm->globals; - name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + name = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); while (ucv_type(name) == UC_STRING) { val = ucv_object_get(scope, ucv_string_get(name), &found); @@ -1128,7 +1133,7 @@ static void uc_vm_insn_load_closure(uc_vm_t *vm, uc_vm_insn_t insn) { uc_callframe_t *frame = uc_vm_current_frame(vm); - uc_value_t *fno = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + uc_value_t *fno = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); uc_function_t *function = (uc_function_t *)fno; uc_closure_t *closure = (uc_closure_t *)ucv_closure_new(vm, function, insn == I_ARFN); volatile int32_t uv; @@ -1163,7 +1168,7 @@ uc_vm_insn_store_var(uc_vm_t *vm, uc_vm_insn_t insn) bool found; scope = vm->globals; - name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + name = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32); while (ucv_type(name) == UC_STRING) { ucv_object_get(scope, ucv_string_get(name), &found); @@ -1562,7 +1567,7 @@ uc_vm_insn_update_var(uc_vm_t *vm, uc_vm_insn_t insn) bool found; scope = vm->globals; - name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32 & 0x00FFFFFF); + name = uc_program_get_constant(uc_vm_current_program(vm), vm->arg.u32 & 0x00FFFFFF); assert(ucv_type(name) == UC_STRING); @@ -2565,7 +2570,7 @@ uc_vm_execute(uc_vm_t *vm, uc_function_t *fn, uc_value_t **retval) if (vm->trace) { buf = xprintbuf_new(); - uc_source_context_format(buf, fn->source, 0, true); + uc_source_context_format(buf, uc_vm_frame_source(frame), 0, true); fwrite(buf->buf, 1, printbuf_length(buf), stderr); printbuf_free(buf); @@ -2602,6 +2607,8 @@ uc_vm_execute(uc_vm_t *vm, uc_function_t *fn, uc_value_t **retval) break; } + ucv_put(&fn->header); + return status; } |