diff options
author | Jo-Philipp Wich <jo@mein.io> | 2022-07-30 14:02:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-30 14:02:03 +0200 |
commit | e55965a3d170f60776ffa2d82b2711d9ea3a0211 (patch) | |
tree | b73977b8e71445de9e5947d2db3bf941cc174f42 | |
parent | 1219d7efa170bf38fb1bf6a10fa0d1f96e62f091 (diff) | |
parent | 156d584e4d0af46c39234ee68a98a16ab4cbe225 (diff) |
Merge pull request #96 from jow-/module-import-export-support
Module import export support
-rw-r--r-- | compiler.c | 598 | ||||
-rw-r--r-- | include/ucode/chunk.h | 14 | ||||
-rw-r--r-- | include/ucode/lexer.h | 40 | ||||
-rw-r--r-- | include/ucode/lib.h | 4 | ||||
-rw-r--r-- | include/ucode/program.h | 20 | ||||
-rw-r--r-- | include/ucode/source.h | 13 | ||||
-rw-r--r-- | include/ucode/types.h | 65 | ||||
-rw-r--r-- | include/ucode/util.h | 5 | ||||
-rw-r--r-- | include/ucode/vallist.h | 14 | ||||
-rw-r--r-- | include/ucode/vm.h | 4 | ||||
-rw-r--r-- | lexer.c | 1468 | ||||
-rw-r--r-- | lib.c | 38 | ||||
-rw-r--r-- | main.c | 60 | ||||
-rw-r--r-- | program.c | 199 | ||||
-rw-r--r-- | source.c | 25 | ||||
-rw-r--r-- | tests/custom/04_modules/01_export_variable_declaration | 29 | ||||
-rw-r--r-- | tests/custom/04_modules/02_export_function_declaration | 22 | ||||
-rw-r--r-- | tests/custom/04_modules/03_export_list | 27 | ||||
-rw-r--r-- | tests/custom/04_modules/04_export_rename | 28 | ||||
-rw-r--r-- | tests/custom/04_modules/05_export_default | 38 | ||||
-rw-r--r-- | tests/custom/04_modules/06_export_errors | 89 | ||||
-rw-r--r-- | tests/custom/04_modules/07_import_default | 99 | ||||
-rw-r--r-- | tests/custom/04_modules/08_import_list | 105 | ||||
-rw-r--r-- | tests/custom/04_modules/09_import_wildcard | 73 | ||||
-rw-r--r-- | tests/custom/04_modules/10_import_none | 18 | ||||
-rw-r--r-- | tests/custom/04_modules/11_import_many_exec_once | 28 | ||||
-rw-r--r-- | tests/custom/04_modules/12_import_immutability | 52 | ||||
-rw-r--r-- | tests/custom/04_modules/13_import_liveness | 29 | ||||
-rw-r--r-- | tests/custom/99_bugs/01_try_catch_stack_mismatch (renamed from tests/custom/04_bugs/01_try_catch_stack_mismatch) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/02_array_pop_use_after_free (renamed from tests/custom/04_bugs/02_array_pop_use_after_free) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/03_switch_fallthrough_miscompilation (renamed from tests/custom/04_bugs/03_switch_fallthrough_miscompilation) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/04_property_set_abort (renamed from tests/custom/04_bugs/04_property_set_abort) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/05_duplicate_resource_type (renamed from tests/custom/04_bugs/05_duplicate_resource_type) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/06_lexer_escape_at_boundary (renamed from tests/custom/04_bugs/06_lexer_escape_at_boundary) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/07_lexer_overlong_lines (renamed from tests/custom/04_bugs/07_lexer_overlong_lines) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/08_compiler_arrow_fn_expressions (renamed from tests/custom/04_bugs/08_compiler_arrow_fn_expressions) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/09_reject_invalid_array_indexes (renamed from tests/custom/04_bugs/09_reject_invalid_array_indexes) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/10_break_stack_mismatch (renamed from tests/custom/04_bugs/10_break_stack_mismatch) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/11_switch_stack_mismatch (renamed from tests/custom/04_bugs/11_switch_stack_mismatch) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/12_altblock_stack_mismatch (renamed from tests/custom/04_bugs/12_altblock_stack_mismatch) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/13_split_by_string_leading_trailing (renamed from tests/custom/04_bugs/13_split_by_string_leading_trailing) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/14_incomplete_expression_at_eof (renamed from tests/custom/04_bugs/14_incomplete_expression_at_eof) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/15_segfault_on_prefix_increment (renamed from tests/custom/04_bugs/15_segfault_on_prefix_increment) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/16_hang_on_regexp_at_eof (renamed from tests/custom/04_bugs/16_hang_on_regexp_at_eof) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/17_hang_on_unclosed_expression_block (renamed from tests/custom/04_bugs/17_hang_on_unclosed_expression_block) | 2 | ||||
-rw-r--r-- | tests/custom/99_bugs/18_hang_on_line_comments_at_eof (renamed from tests/custom/04_bugs/18_hang_on_line_comments_at_eof) | 2 | ||||
-rw-r--r-- | tests/custom/99_bugs/19_truncated_format_string (renamed from tests/custom/04_bugs/19_truncated_format_string) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/20_use_strict_stack_mismatch (renamed from tests/custom/04_bugs/20_use_strict_stack_mismatch) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/21_compiler_parenthesized_prop_keyword (renamed from tests/custom/04_bugs/21_compiler_parenthesized_prop_keyword) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/22_compiler_break_continue_scoping (renamed from tests/custom/04_bugs/22_compiler_break_continue_scoping) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/23_compiler_parenthesized_division (renamed from tests/custom/04_bugs/23_compiler_parenthesized_division) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/24_compiler_local_for_loop_declaration (renamed from tests/custom/04_bugs/24_compiler_local_for_loop_declaration) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/25_lexer_shifted_offsets (renamed from tests/custom/04_bugs/25_lexer_shifted_offsets) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/26_compiler_jmp_to_zero (renamed from tests/custom/04_bugs/26_compiler_jmp_to_zero) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/27_invalid_sparse_array_set (renamed from tests/custom/04_bugs/27_invalid_sparse_array_set) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/28_null_equality (renamed from tests/custom/04_bugs/28_null_equality) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/29_empty_string_as_number (renamed from tests/custom/04_bugs/29_empty_string_as_number) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/30_nan_strict_equality (renamed from tests/custom/04_bugs/30_nan_strict_equality) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/31_vallist_8bit_shortstrings (renamed from tests/custom/04_bugs/31_vallist_8bit_shortstrings) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/32_compiler_switch_patchlist_corruption (renamed from tests/custom/04_bugs/32_compiler_switch_patchlist_corruption) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/33_vm_computed_prop_decl_crash (renamed from tests/custom/04_bugs/33_vm_computed_prop_decl_crash) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/34_dirname_off_by_one (renamed from tests/custom/04_bugs/34_dirname_off_by_one) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/35_vm_callframe_double_free (renamed from tests/custom/04_bugs/35_vm_callframe_double_free) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/36_vm_nested_call_return (renamed from tests/custom/04_bugs/36_vm_nested_call_return) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/37_compiler_unexpected_unary_op (renamed from tests/custom/04_bugs/37_compiler_unexpected_unary_op) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/38_index_segfault (renamed from tests/custom/04_bugs/38_index_segfault) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/39_compiler_switch_continue_mismatch (renamed from tests/custom/04_bugs/39_compiler_switch_continue_mismatch) | 0 | ||||
-rw-r--r-- | tests/custom/99_bugs/40_lexer_bug_on_lstrip_off (renamed from tests/custom/04_bugs/40_lexer_bug_on_lstrip_off) | 0 | ||||
-rwxr-xr-x | tests/custom/run_tests.sh | 2 | ||||
-rw-r--r-- | types.c | 64 | ||||
-rw-r--r-- | vm.c | 163 |
71 files changed, 2462 insertions, 975 deletions
@@ -36,7 +36,7 @@ static void uc_compiler_compile_constant(uc_compiler_t *compiler); static void uc_compiler_compile_template(uc_compiler_t *compiler); static void uc_compiler_compile_comma(uc_compiler_t *compiler); static void uc_compiler_compile_labelexpr(uc_compiler_t *compiler); -static void uc_compiler_compile_function(uc_compiler_t *compiler); +static void uc_compiler_compile_funcexpr(uc_compiler_t *compiler); static void uc_compiler_compile_and(uc_compiler_t *compiler); static void uc_compiler_compile_or(uc_compiler_t *compiler); static void uc_compiler_compile_nullish(uc_compiler_t *compiler); @@ -76,7 +76,7 @@ uc_compiler_parse_rules[TK_ERROR + 1] = { [TK_TEMPLATE] = { uc_compiler_compile_template, NULL, P_NONE }, [TK_COMMA] = { NULL, uc_compiler_compile_comma, P_COMMA }, [TK_LABEL] = { uc_compiler_compile_labelexpr, NULL, P_NONE }, - [TK_FUNC] = { uc_compiler_compile_function, NULL, P_NONE }, + [TK_FUNC] = { uc_compiler_compile_funcexpr, NULL, P_NONE }, [TK_AND] = { NULL, uc_compiler_compile_and, P_AND }, [TK_OR] = { NULL, uc_compiler_compile_or, P_OR }, [TK_NULLISH] = { NULL, uc_compiler_compile_nullish, P_OR }, @@ -121,7 +121,7 @@ uc_compiler_exprstack_is(uc_compiler_t *compiler, uc_exprflag_t flag) } static void -uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_program_t *program, bool strict) +uc_compiler_init(uc_compiler_t *compiler, const char *name, uc_source_t *source, size_t srcpos, uc_program_t *program, bool strict) { uc_value_t *varname = ucv_string_new("(callee)"); uc_function_t *fn; @@ -129,7 +129,7 @@ uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_pr compiler->scope_depth = 0; compiler->program = program; - compiler->function = uc_program_function_new(program, name, srcpos); + compiler->function = uc_program_function_new(program, name, source, srcpos); compiler->locals.count = 0; compiler->locals.entries = NULL; @@ -163,7 +163,7 @@ uc_compiler_current_chunk(uc_compiler_t *compiler) static uc_source_t * uc_compiler_current_source(uc_compiler_t *compiler) { - return compiler->program->source; + return uc_program_function_source(compiler->function); } __attribute__((format(printf, 3, 0))) static void @@ -497,12 +497,23 @@ uc_compiler_set_u32(uc_compiler_t *compiler, size_t off, uint32_t n) } static size_t -uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) +uc_compiler_emit_constant_index(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) { size_t cidx = uc_program_add_constant(compiler->program, val); + uc_compiler_emit_u32(compiler, srcpos, cidx); + + return cidx; +} + +static size_t +uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) +{ + size_t cidx; + uc_compiler_emit_insn(compiler, srcpos, I_LOAD); - uc_compiler_emit_u32(compiler, 0, cidx); + + cidx = uc_compiler_emit_constant_index(compiler, srcpos, val); return cidx; } @@ -790,10 +801,22 @@ uc_compiler_add_upval(uc_compiler_t *compiler, size_t idx, bool local, uc_value_ static ssize_t uc_compiler_resolve_upval(uc_compiler_t *compiler, uc_value_t *name, bool *constant) { + uc_upvals_t *upvals = &compiler->upvals; + uc_upval_t *uv; ssize_t idx; + size_t i; + + if (!compiler->parent) { + for (i = 0, uv = upvals->entries; i < upvals->count; i++, uv = upvals->entries + i) { + if (ucv_is_equal(uv->name, name) && uv->local == false) { + *constant = uv->constant; + + return i; + } + } - if (!compiler->parent) return -1; + } idx = uc_compiler_resolve_local(compiler->parent, name, constant); @@ -871,7 +894,7 @@ uc_compiler_emit_inc_dec(uc_compiler_t *compiler, uc_tokentype_t toktype, bool i varname = compiler->upvals.entries[cidx].name; if (varname) - uc_compiler_syntax_error(compiler, 0, + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, "Invalid increment/decrement of constant '%s'", ucv_string_get(varname)); @@ -1110,7 +1133,8 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest pos = compiler->parser->prev.pos; - uc_compiler_init(&fncompiler, NULL, compiler->parser->prev.pos, + uc_compiler_init(&fncompiler, NULL, uc_compiler_current_source(compiler), + compiler->parser->prev.pos, compiler->program, uc_compiler_is_strict(compiler)); @@ -1536,7 +1560,7 @@ uc_compiler_compile_delimitted_block(uc_compiler_t *compiler, uc_tokentype_t end } static void -uc_compiler_compile_function(uc_compiler_t *compiler) +uc_compiler_compile_funcexpr_common(uc_compiler_t *compiler, bool require_name) { uc_compiler_t fncompiler = { 0 }; uc_value_t *name = NULL; @@ -1559,9 +1583,14 @@ uc_compiler_compile_function(uc_compiler_t *compiler) if (slot == -1) uc_compiler_initialize_local(compiler); } + else if (require_name) { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, "Expecting function name"); + } uc_compiler_init(&fncompiler, - name ? ucv_string_get(name) : NULL, compiler->parser->prev.pos, + name ? ucv_string_get(name) : NULL, + uc_compiler_current_source(compiler), + compiler->parser->prev.pos, compiler->program, uc_compiler_is_strict(compiler)); @@ -1644,6 +1673,18 @@ uc_compiler_compile_function(uc_compiler_t *compiler) } static void +uc_compiler_compile_funcexpr(uc_compiler_t *compiler) +{ + return uc_compiler_compile_funcexpr_common(compiler, false); +} + +static void +uc_compiler_compile_funcdecl(uc_compiler_t *compiler) +{ + return uc_compiler_compile_funcexpr_common(compiler, true); +} + +static void uc_compiler_compile_and(uc_compiler_t *compiler) { uc_chunk_t *chunk = uc_compiler_current_chunk(compiler); @@ -2874,7 +2915,7 @@ uc_compiler_compile_statement(uc_compiler_t *compiler) else if (uc_compiler_parse_match(compiler, TK_TRY)) uc_compiler_compile_try(compiler); else if (uc_compiler_parse_match(compiler, TK_FUNC)) - uc_compiler_compile_function(compiler); + uc_compiler_compile_funcdecl(compiler); else if (uc_compiler_parse_match(compiler, TK_BREAK)) uc_compiler_compile_control(compiler); else if (uc_compiler_parse_match(compiler, TK_CONTINUE)) @@ -2894,13 +2935,516 @@ uc_compiler_compile_statement(uc_compiler_t *compiler) } static void -uc_compiler_compile_declaration(uc_compiler_t *compiler) +uc_compiler_export_add(uc_compiler_t *compiler, uc_value_t *name, ssize_t slot) +{ + uc_source_t *source = uc_compiler_current_source(compiler); + + if (!uc_source_export_add(source, name)) { + if (name) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Duplicate export '%s' for module '%s'", ucv_string_get(name), source->filename); + else + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Duplicate default export for module '%s'", source->filename); + } + else { + uc_compiler_emit_insn(compiler, 0, I_EXPORT); + uc_compiler_emit_u32(compiler, 0, slot); + } +} + +static void +uc_compiler_compile_exportlist(uc_compiler_t *compiler) +{ + uc_value_t *label, *name; + bool constant; + ssize_t slot; + + /* parse export symbols */ + do { + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + name = NULL; + + slot = uc_compiler_resolve_local(compiler, label, &constant); + + if (slot == -1) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Attempt to export undeclared or non-local variable '%s'", + ucv_string_get(label)); + } + + if (uc_compiler_parse_match(compiler, TK_AS)) { + if (uc_compiler_parse_match(compiler, TK_LABEL) || uc_compiler_parse_match(compiler, TK_STRING)) { + name = ucv_get(compiler->parser->prev.uv); + } + else if (!uc_compiler_parse_match(compiler, TK_DEFAULT)) { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting Label, String or 'default'"); + } + } + else { + name = ucv_get(label); + } + + uc_compiler_export_add(compiler, name, slot); + + ucv_put(label); + ucv_put(name); + + if (uc_compiler_parse_match(compiler, TK_RBRACE)) + break; + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); + + uc_compiler_parse_consume(compiler, TK_SCOL); +} + +static void +uc_compiler_compile_export(uc_compiler_t *compiler) +{ + uc_locals_t *locals = &compiler->locals; + size_t off = locals->count; + uc_value_t *name; + ssize_t slot; + + if (compiler->program->sources.count == 1 || compiler->scope_depth) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Exports may only appear at top level of a module"); + + return; + } + + if (uc_compiler_parse_match(compiler, TK_LBRACE)) { + uc_compiler_compile_exportlist(compiler); + + return; + } + + if (uc_compiler_parse_match(compiler, TK_LOCAL)) + uc_compiler_compile_declexpr(compiler, false); + else if (uc_compiler_parse_match(compiler, TK_CONST)) + uc_compiler_compile_declexpr(compiler, true); + else if (uc_compiler_parse_match(compiler, TK_FUNC)) + uc_compiler_compile_funcdecl(compiler); + else if (uc_compiler_parse_match(compiler, TK_DEFAULT)) + uc_compiler_compile_expression(compiler); + else + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting 'let', 'const', 'function', 'default' or '{'"); + + if (off == locals->count) { + name = ucv_string_new("(module default export)"); + slot = uc_compiler_declare_local(compiler, name, true); + ucv_put(name); + + if (slot != -1) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Duplicate default export statement"); + else + uc_compiler_export_add(compiler, NULL, compiler->locals.count - 1); + } + else { + for (; off < locals->count; off++) + uc_compiler_export_add(compiler, locals->entries[off].name, off); + } + + uc_compiler_parse_consume(compiler, TK_SCOL); +} + +static uc_program_t * +uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, uc_program_t *prog, char **errp); + +static bool +uc_compiler_compile_module_source(uc_compiler_t *compiler, uc_source_t *source, uc_value_t *imports, char **errp) +{ + uc_parse_config_t config = { + .raw_mode = true, + .strict_declarations = true, + .module_search_path = compiler->parser->lex.config->module_search_path + }; + + size_t i, load_idx = 0, n_imports = 0; + bool loaded = false; + uc_value_t *import; + ssize_t slot; + + uc_program_function_foreach(compiler->program, fn) { + if (uc_program_function_source(fn) == source) { + loaded = true; + break; + } + } + + if (!loaded) { + load_idx = uc_program_function_id(compiler->program, + uc_program_function_last(compiler->program)) + 1; + + if (!uc_compile_from_source(&config, source, compiler->program, errp)) + return false; + + /* emit load, call & pop instructions */ + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_CLFN); + uc_compiler_emit_u32(compiler, 0, load_idx); + + uc_compiler_emit_insn(compiler, 0, I_CALL); + uc_compiler_emit_u32(compiler, 0, 0); + + uc_compiler_emit_insn(compiler, 0, I_POP); + } + + /* count imports, handle wildcard imports */ + for (i = 0; i < ucv_array_length(imports); i++) { + if (ucv_boolean_get(ucv_array_get(imports, i))) { + /* find index of first module export */ + slot = uc_program_export_lookup(compiler->program, source, source->exports.entries[0]); + + if (slot > 0xffff || source->exports.count > 0xffff) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Too many module exports"); + } + + /* emit import instruction... */ + uc_compiler_emit_insn(compiler, 0, I_IMPORT); + uc_compiler_emit_u32(compiler, 0, source->exports.count | (0xffff << 16)); + + /* ... followed by first module export offset ... */ + uc_compiler_emit_u16(compiler, 0, slot); + + /* ... and constant indexes for all exported names */ + for (load_idx = 0; load_idx < source->exports.count; load_idx++) { + if (source->exports.entries[load_idx]) + import = ucv_get(source->exports.entries[load_idx]); + else + import = ucv_string_new("default"); + + uc_compiler_emit_constant_index(compiler, 0, import); + ucv_put(import); + } + + } + else { + n_imports++; + } + } + + /* 0xffff is reserved for wildcard import */ + if (n_imports > 0xfffe) + uc_compiler_syntax_error(compiler, 0, "Too many imports"); + + /* emit non-wilcard import instructions */ + for (i = 0; i < ucv_array_length(imports); i++) { + import = ucv_array_get(imports, i); + + if (!ucv_boolean_get(import)) { + slot = uc_program_export_lookup(compiler->program, source, import); + + if (slot == -1) { + if (import) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Module %s does not export '%s'", source->filename, ucv_string_get(import)); + else + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Module %s has no default export", source->filename); + } + else if (slot > 0xffff) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Too many module exports"); + } + else { + uc_compiler_emit_insn(compiler, 0, I_IMPORT); + uc_compiler_emit_u32(compiler, 0, slot | ((compiler->upvals.count - n_imports + i) << 16)); + } + } + } + + return true; +} + +static char * +uc_compiler_canonicalize_path(const char *path, const char *basedir) { + char *p, *resolved; + if (*path == '/') + xasprintf(&p, "%s", path); + else if (basedir) + xasprintf(&p, "%s/%s", basedir, path); + else + xasprintf(&p, "./%s", path); + + resolved = realpath(p, NULL); + + free(p); + + return resolved; +} + +static char * +uc_compiler_expand_module_path(const char *name, const char *basedir, const char *template) +{ + int namelen, prefixlen; + char *path, *p; + + p = strchr(template, '*'); + + if (!p) + return NULL; + + prefixlen = p - template; + namelen = strlen(name); + + xasprintf(&path, "%.*s%.*s%s", prefixlen, template, namelen, name, p + 1); + + for (p = path + prefixlen; namelen > 0; namelen--, p++) + if (*p == '.') + *p = '/'; + + p = uc_compiler_canonicalize_path(path, basedir); + + free(path); + + return p; +} + +static char * +uc_compiler_resolve_module_path(uc_compiler_t *compiler, const char *name) +{ + uc_search_path_t *search = &compiler->parser->lex.config->module_search_path; + uc_source_t *source = uc_compiler_current_source(compiler); + char *path = NULL; + size_t i; + + if (strchr(name, '/')) + return uc_compiler_canonicalize_path(name, source->runpath); + + for (i = 0; i < search->count && !path; i++) + path = uc_compiler_expand_module_path(name, source->runpath, search->entries[i]); + + return path; +} + +static uc_source_t * +uc_compiler_acquire_source(uc_compiler_t *compiler, const char *path) +{ + size_t i; + + for (i = 0; i < compiler->program->sources.count; i++) + if (!strcmp(compiler->program->sources.entries[i]->filename, path)) + return uc_source_get(compiler->program->sources.entries[i]); + + return uc_source_new_file(path); +} + +static bool +uc_compiler_compile_module(uc_compiler_t *compiler, const char *name, uc_value_t *imports) +{ + uc_source_t *source; + char *path, *err; + bool res; + + if (!name) + return false; + + path = uc_compiler_resolve_module_path(compiler, name); + + if (path) { + source = uc_compiler_acquire_source(compiler, path); + + if (source) { + err = NULL; + res = uc_compiler_compile_module_source(compiler, source, imports, &err); + + if (!res) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unable to compile module '%s':\n%s", source->filename, err); + + free(err); + } + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unable to open module '%s': %s", + path, strerror(errno)); + + res = false; + } + } + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unable to resolve path for module '%s'", name); + + return false; + } + + uc_source_put(source); + free(path); + + return res; +} + +static void +uc_compiler_import_add(uc_compiler_t *compiler, uc_value_t *name) +{ + bool constant; + ssize_t slot; + + slot = uc_compiler_resolve_local(compiler, name, &constant); + + if (slot != -1) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Import name '%s' is already declared as local variable", + ucv_string_get(name)); + + return; + } + + slot = uc_compiler_resolve_upval(compiler, name, &constant); + + if (slot != -1) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Import name '%s' is already used", + ucv_string_get(name)); + + return; + } + + uc_compiler_add_upval(compiler, (2 << 14) + compiler->upvals.count, false, name, true); +} + +static void +uc_compiler_compile_importlist(uc_compiler_t *compiler, uc_value_t *namelist) +{ + uc_value_t *label, *name; + + /* parse export symbols */ + do { + name = NULL; + label = NULL; + + if (uc_compiler_parse_match(compiler, TK_DEFAULT)) { + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + } + else if (uc_compiler_parse_match(compiler, TK_STRING)) { + name = ucv_get(compiler->parser->prev.uv); + + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + } + else if (uc_compiler_parse_match(compiler, TK_LABEL)) { + name = ucv_get(compiler->parser->prev.uv); + + if (uc_compiler_parse_match(compiler, TK_AS)) { + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + } + else { + label = ucv_get(name); + } + } + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting Label, String or 'default'"); + } + + uc_compiler_import_add(compiler, label); + ucv_array_push(namelist, name); + ucv_put(label); + + if (uc_compiler_parse_match(compiler, TK_RBRACE)) + break; + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); +} + +static void +uc_compiler_compile_import(uc_compiler_t *compiler) +{ + uc_value_t *namelist = ucv_array_new(NULL); + + if (compiler->scope_depth) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Imports may only appear at top level"); + + return; + } + + /* import { ... } from */ + if (uc_compiler_parse_match(compiler, TK_LBRACE)) { + uc_compiler_compile_importlist(compiler, namelist); + uc_compiler_parse_consume(compiler, TK_FROM); + } + + /* import * as name from */ + else if (uc_compiler_parse_match(compiler, TK_MUL)) { + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + uc_compiler_declare_local(compiler, compiler->parser->prev.uv, true); + uc_compiler_initialize_local(compiler); + ucv_array_push(namelist, ucv_boolean_new(true)); + + uc_compiler_parse_consume(compiler, TK_FROM); + } + + /* import defaultExport [, ... ] from */ + else if (uc_compiler_parse_match(compiler, TK_LABEL)) { + uc_compiler_import_add(compiler, compiler->parser->prev.uv); + ucv_array_push(namelist, NULL); + + /* import defaultExport, ... from */ + if (uc_compiler_parse_match(compiler, TK_COMMA)) { + /* import defaultExport, { ... } from */ + if (uc_compiler_parse_match(compiler, TK_LBRACE)) { + uc_compiler_compile_importlist(compiler, namelist); + } + + /* import defaultExport, * as name from */ + else if (uc_compiler_parse_match(compiler, TK_MUL)) { + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + uc_compiler_declare_local(compiler, compiler->parser->prev.uv, true); + uc_compiler_initialize_local(compiler); + ucv_array_push(namelist, ucv_boolean_new(true)); + } + + /* error */ + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting '{' or '*'"); + } + } + + uc_compiler_parse_consume(compiler, TK_FROM); + } + + uc_compiler_parse_consume(compiler, TK_STRING); + + uc_compiler_compile_module(compiler, ucv_string_get(compiler->parser->prev.uv), namelist); + + uc_compiler_parse_consume(compiler, TK_SCOL); + + ucv_put(namelist); +} + +static void +uc_compiler_compile_declaration(uc_compiler_t *compiler) +{ if (uc_compiler_parse_match(compiler, TK_LOCAL)) uc_compiler_compile_local(compiler); else if (uc_compiler_parse_match(compiler, TK_CONST)) uc_compiler_compile_const(compiler); + else if (uc_compiler_parse_match(compiler, TK_EXPORT)) + uc_compiler_compile_export(compiler); + else if (uc_compiler_parse_match(compiler, TK_IMPORT)) + uc_compiler_compile_import(compiler); else uc_compiler_compile_statement(compiler); @@ -2912,7 +3456,7 @@ uc_compiler_compile_declaration(uc_compiler_t *compiler) static uc_program_t * -uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **errp) +uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, uc_program_t *prog, char **errp) { #ifdef NO_COMPILE if (errp) @@ -2923,13 +3467,21 @@ uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **er uc_exprstack_t expr = { .token = TK_EOF }; uc_parser_t parser = { .config = config }; uc_compiler_t compiler = { .parser = &parser, .exprstack = &expr }; - uc_program_t *prog; + uc_program_t *progptr; uc_function_t *fn; + const char *name; - prog = uc_program_new(source); + if (!prog) { + progptr = uc_program_new(); + name = "main"; + } + else { + progptr = prog; + name = "module"; + } uc_lexer_init(&parser.lex, config, source); - uc_compiler_init(&compiler, "main", 0, prog, + uc_compiler_init(&compiler, name, source, 0, progptr, config && config->strict_declarations); uc_compiler_parse_advance(&compiler); @@ -2950,12 +3502,13 @@ uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **er uc_lexer_free(&parser.lex); if (!fn) { - ucv_put(&prog->header); + if (progptr != prog) + ucv_put(&progptr->header); return NULL; } - return prog; + return progptr; #endif } @@ -2981,9 +3534,12 @@ uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) { uc_program_t *prog = NULL; + if (!config) + config = &uc_default_parse_config; + switch (uc_source_type_test(source)) { case UC_SOURCE_TYPE_PLAIN: - prog = uc_compile_from_source(config, source, errp); + prog = uc_compile_from_source(config, source, NULL, errp); break; case UC_SOURCE_TYPE_PRECOMPILED: diff --git a/include/ucode/chunk.h b/include/ucode/chunk.h index 78d5ec6..a5f0b1c 100644 --- a/include/ucode/chunk.h +++ b/include/ucode/chunk.h @@ -24,14 +24,14 @@ #include "util.h" #include "types.h" -void uc_chunk_init(uc_chunk_t *chunk); -void uc_chunk_free(uc_chunk_t *chunk); -size_t uc_chunk_add(uc_chunk_t *chunk, uint8_t byte, size_t line); +__hidden void uc_chunk_init(uc_chunk_t *chunk); +__hidden void uc_chunk_free(uc_chunk_t *chunk); +__hidden size_t uc_chunk_add(uc_chunk_t *chunk, uint8_t byte, size_t line); -void uc_chunk_pop(uc_chunk_t *chunk); +__hidden void uc_chunk_pop(uc_chunk_t *chunk); -size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off); -void uc_chunk_debug_add_variable(uc_chunk_t *chunk, size_t from, size_t to, size_t slot, bool upval, uc_value_t *name); -uc_value_t *uc_chunk_debug_get_variable(uc_chunk_t *chunk, size_t off, size_t slot, bool upval); +__hidden size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off); +__hidden void uc_chunk_debug_add_variable(uc_chunk_t *chunk, size_t from, size_t to, size_t slot, bool upval, uc_value_t *name); +__hidden uc_value_t *uc_chunk_debug_get_variable(uc_chunk_t *chunk, size_t off, size_t slot, bool upval); #endif /* UCODE_CHUNK_H */ diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h index 835bc2b..c013aac 100644 --- a/include/ucode/lexer.h +++ b/include/ucode/lexer.h @@ -117,6 +117,10 @@ typedef enum { TK_NULLISH, TK_PLACEH, TK_TEMPLATE, + TK_IMPORT, + TK_EXPORT, + TK_FROM, + TK_AS, TK_EOF, TK_ERROR @@ -124,14 +128,11 @@ typedef enum { typedef enum { UC_LEX_IDENTIFY_BLOCK, - UC_LEX_BLOCK_COMMENT_START, - UC_LEX_BLOCK_EXPRESSION_START, UC_LEX_BLOCK_EXPRESSION_EMIT_TAG, - UC_LEX_BLOCK_STATEMENT_START, UC_LEX_BLOCK_COMMENT, UC_LEX_IDENTIFY_TOKEN, - UC_LEX_PARSE_TOKEN, - UC_LEX_PLACEHOLDER, + UC_LEX_PLACEHOLDER_START, + UC_LEX_PLACEHOLDER_END, UC_LEX_EOF } uc_lex_state_t; @@ -145,19 +146,9 @@ typedef struct { uc_lex_state_t state; uc_parse_config_t *config; uc_source_t *source; - uint8_t eof:1; - uint8_t is_escape:1; - uint8_t is_placeholder:1; uint8_t no_regexp:1; uint8_t no_keyword:1; - size_t buflen; - char *buf, *bufstart, *bufend; - size_t lookbehindlen; - char *lookbehind; - const void *tok; uc_token_t curr; - char esc[5]; - uint8_t esclen; int lead_surrogate; size_t lastoff; enum { @@ -176,19 +167,24 @@ typedef struct { size_t count; size_t *entries; } templates; + struct { + size_t count; + char *entries; + } buffer; + unsigned char *rbuf; + size_t rlen, rpos; } uc_lexer_t; -void uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source); -void uc_lexer_free(uc_lexer_t *lex); +__hidden void uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source); +__hidden void uc_lexer_free(uc_lexer_t *lex); -uc_token_t *uc_lexer_next_token(uc_lexer_t *lex); +__hidden uc_token_t *uc_lexer_next_token(uc_lexer_t *lex); -bool uc_lexer_is_keyword(uc_value_t *label); +__hidden bool uc_lexer_is_keyword(uc_value_t *label); -bool utf8enc(char **out, int *rem, int code); +__hidden bool utf8enc(char **out, int *rem, int code); -const char * -uc_tokenname(unsigned type); +__hidden const char *uc_tokenname(unsigned type); #endif /* UCODE_LEXER_H */ diff --git a/include/ucode/lib.h b/include/ucode/lib.h index a80844d..4c7a3b0 100644 --- a/include/ucode/lib.h +++ b/include/ucode/lib.h @@ -31,8 +31,8 @@ extern const uc_function_list_t uc_stdlib_functions[]; void uc_stdlib_load(uc_value_t *scope); uc_cfn_ptr_t uc_stdlib_function(const char *name); -bool uc_source_context_format(uc_stringbuf_t *buf, uc_source_t *src, size_t off, bool compact); -bool uc_error_context_format(uc_stringbuf_t *buf, uc_source_t *src, uc_value_t *stacktrace, size_t off); +__hidden bool uc_source_context_format(uc_stringbuf_t *buf, uc_source_t *src, size_t off, bool compact); +__hidden bool uc_error_context_format(uc_stringbuf_t *buf, uc_source_t *src, uc_value_t *stacktrace, size_t off); /* vm helper */ diff --git a/include/ucode/program.h b/include/ucode/program.h index e8b96ed..9014ae4 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -20,7 +20,7 @@ #include "types.h" -uc_program_t *uc_program_new(uc_source_t *); +uc_program_t *uc_program_new(void); static inline uc_program_t * uc_program_get(uc_program_t *prog) { @@ -46,15 +46,19 @@ uc_program_put(uc_program_t *prog) { fn = fn##_tmp, \ fn##_tmp = (uc_function_t *)fn##_tmp->progref.prev) -uc_function_t *uc_program_function_new(uc_program_t *, const char *, size_t); -size_t uc_program_function_id(uc_program_t *, uc_function_t *); -uc_function_t *uc_program_function_load(uc_program_t *, size_t); -size_t uc_program_function_srcpos(uc_function_t *, size_t); -void uc_program_function_free(uc_function_t *); +#define uc_program_function_last(prog) (uc_function_t *)prog->functions.next +__hidden uc_function_t *uc_program_function_new(uc_program_t *, const char *, uc_source_t *, size_t); +__hidden size_t uc_program_function_id(uc_program_t *, uc_function_t *); +__hidden uc_function_t *uc_program_function_load(uc_program_t *, size_t); +__hidden uc_source_t *uc_program_function_source(uc_function_t *); +__hidden size_t uc_program_function_srcpos(uc_function_t *, size_t); +__hidden void uc_program_function_free(uc_function_t *); -uc_value_t *uc_program_get_constant(uc_program_t *, size_t); -ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); +__hidden ssize_t uc_program_export_lookup(uc_program_t *, uc_source_t *, uc_value_t *); + +__hidden uc_value_t *uc_program_get_constant(uc_program_t *, size_t); +__hidden ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); void uc_program_write(uc_program_t *, FILE *, bool); uc_program_t *uc_program_load(uc_source_t *, char **); diff --git a/include/ucode/source.h b/include/ucode/source.h index 6f9a8d7..e1fd211 100644 --- a/include/ucode/source.h +++ b/include/ucode/source.h @@ -35,7 +35,7 @@ typedef enum { uc_source_t *uc_source_new_file(const char *path); uc_source_t *uc_source_new_buffer(const char *name, char *buf, size_t len); -size_t uc_source_get_line(uc_source_t *source, size_t *offset); +__hidden size_t uc_source_get_line(uc_source_t *source, size_t *offset); static inline uc_source_t * uc_source_get(uc_source_t *source) { @@ -47,11 +47,14 @@ uc_source_put(uc_source_t *source) { ucv_put(source ? &source->header : NULL); } -uc_source_type_t uc_source_type_test(uc_source_t *source); +__hidden uc_source_type_t uc_source_type_test(uc_source_t *source); -void uc_source_line_next(uc_source_t *source); -void uc_source_line_update(uc_source_t *source, size_t off); +__hidden void uc_source_line_next(uc_source_t *source); +__hidden void uc_source_line_update(uc_source_t *source, size_t off); -void uc_source_runpath_set(uc_source_t *source, const char *runpath); +__hidden void uc_source_runpath_set(uc_source_t *source, const char *runpath); + +__hidden bool uc_source_export_add(uc_source_t *source, uc_value_t *name); +__hidden ssize_t uc_source_export_lookup(uc_source_t *source, uc_value_t *name); #endif /* UCODE_SOURCE_H */ diff --git a/include/ucode/types.h b/include/ucode/types.h index d1e01a1..0b63501 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -47,7 +47,7 @@ typedef enum uc_type { typedef struct uc_value { uint32_t type:4; uint32_t mark:1; - uint32_t u64:1; + uint32_t u64_or_constant:1; uint32_t refcount:26; } uc_value_t; @@ -65,6 +65,7 @@ typedef struct { /* Source buffer defintions */ uc_declare_vector(uc_lineinfo_t, uint8_t); +uc_declare_vector(uc_exports_t, uc_value_t *); typedef struct { uc_value_t header; @@ -72,6 +73,7 @@ typedef struct { FILE *fp; size_t off; uc_lineinfo_t lineinfo; + uc_exports_t exports; } uc_source_t; @@ -113,6 +115,7 @@ typedef struct uc_function { bool arrow, vararg, strict; size_t nargs; size_t nupvals; + size_t srcidx; size_t srcpos; uc_chunk_t chunk; struct uc_program *program; @@ -202,23 +205,45 @@ uc_declare_vector(uc_resource_types_t, uc_resource_type_t *); /* Program structure definitions */ +uc_declare_vector(uc_sources_t, uc_source_t *); + typedef struct uc_program { uc_value_t header; uc_value_list_t constants; uc_weakref_t functions; - uc_source_t *source; + uc_sources_t sources; } uc_program_t; /* Parser definitions */ +uc_declare_vector(uc_search_path_t, char *); + typedef struct { bool lstrip_blocks; bool trim_blocks; bool strict_declarations; bool raw_mode; + uc_search_path_t module_search_path; } uc_parse_config_t; +extern uc_parse_config_t uc_default_parse_config; + +void uc_search_path_init(uc_search_path_t *search_path); + +static inline void +uc_search_path_add(uc_search_path_t *search_path, char *path) { + uc_vector_push(search_path, xstrdup(path)); +} + +static inline void +uc_search_path_free(uc_search_path_t *search_path) { + while (search_path->count > 0) + free(search_path->entries[--search_path->count]); + + uc_vector_clear(search_path); +} + /* VM definitions */ @@ -249,6 +274,7 @@ typedef struct { uc_declare_vector(uc_callframes_t, uc_callframe_t); uc_declare_vector(uc_stack_t, uc_value_t *); +uc_declare_vector(uc_modexports_t, uc_upvalref_t *); typedef struct printbuf uc_stringbuf_t; @@ -265,6 +291,7 @@ struct uc_vm { uc_source_t *sources; uc_weakref_t values; uc_resource_types_t restypes; + uc_modexports_t exports; union { uint32_t u32; int32_t s32; @@ -283,13 +310,12 @@ struct uc_vm { /* Value API */ -void ucv_free(uc_value_t *, bool); -void ucv_put(uc_value_t *); - -void ucv_unref(uc_weakref_t *); -void ucv_ref(uc_weakref_t *, uc_weakref_t *); +__hidden void ucv_free(uc_value_t *, bool); +__hidden void ucv_unref(uc_weakref_t *); +__hidden void ucv_ref(uc_weakref_t *, uc_weakref_t *); uc_value_t *ucv_get(uc_value_t *uv); +void ucv_put(uc_value_t *); uc_type_t ucv_type(uc_value_t *); const char *ucv_typename(uc_value_t *); @@ -448,7 +474,28 @@ ucv_is_arrowfn(uc_value_t *uv) static inline bool ucv_is_u64(uc_value_t *uv) { - return (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64 == true); + return (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64_or_constant == true && + uv->type == UC_INTEGER); +} + +static inline bool +ucv_is_constant(uc_value_t *uv) +{ + return (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64_or_constant == true && + (uv->type == UC_ARRAY || uv->type == UC_OBJECT)); +} + +static inline bool +ucv_set_constant(uc_value_t *uv, bool constant) +{ + if (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64_or_constant != constant && + (uv->type == UC_ARRAY || uv->type == UC_OBJECT)) { + uv->u64_or_constant = constant; + + return true; + } + + return false; } static inline bool @@ -499,6 +546,6 @@ ucv_clear_mark(uc_value_t *uv) void ucv_gc(uc_vm_t *); -void ucv_freeall(uc_vm_t *); +__hidden void ucv_freeall(uc_vm_t *); #endif /* UCODE_TYPES_H */ diff --git a/include/ucode/util.h b/include/ucode/util.h index 093951e..52303cc 100644 --- a/include/ucode/util.h +++ b/include/ucode/util.h @@ -26,6 +26,11 @@ #include <json-c/json.h> +#ifndef __hidden +#define __hidden __attribute__((visibility("hidden"))) +#endif + + /* alignment & array size */ #ifndef ALIGN diff --git a/include/ucode/vallist.h b/include/ucode/vallist.h index 3dc5720..78b9a3e 100644 --- a/include/ucode/vallist.h +++ b/include/ucode/vallist.h @@ -33,17 +33,17 @@ typedef enum { TAG_LSTR = 5 } uc_value_type_t; -uc_value_t *uc_number_parse(const char *buf, char **end); -uc_value_t *uc_number_parse_octal(const char *buf, char **end); +__hidden uc_value_t *uc_number_parse(const char *buf, char **end); +__hidden uc_value_t *uc_number_parse_octal(const char *buf, char **end); bool uc_double_pack(double d, char *buf, bool little_endian); double uc_double_unpack(const char *buf, bool little_endian); -void uc_vallist_init(uc_value_list_t *list); -void uc_vallist_free(uc_value_list_t *list); +__hidden void uc_vallist_init(uc_value_list_t *list); +__hidden void uc_vallist_free(uc_value_list_t *list); -ssize_t uc_vallist_add(uc_value_list_t *list, uc_value_t *value); -uc_value_type_t uc_vallist_type(uc_value_list_t *list, size_t idx); -uc_value_t *uc_vallist_get(uc_value_list_t *list, size_t idx); +__hidden ssize_t uc_vallist_add(uc_value_list_t *list, uc_value_t *value); +__hidden uc_value_type_t uc_vallist_type(uc_value_list_t *list, size_t idx); +__hidden uc_value_t *uc_vallist_get(uc_value_list_t *list, size_t idx); #endif /* UCODE_VALUE_H */ diff --git a/include/ucode/vm.h b/include/ucode/vm.h index 8377446..cc57fdb 100644 --- a/include/ucode/vm.h +++ b/include/ucode/vm.h @@ -95,7 +95,9 @@ __insn(QMCALL) \ __insn(PRINT) \ __insn(NEXTK) \ __insn(NEXTKV) \ -__insn(DELETE) +__insn(DELETE) \ +__insn(IMPORT) \ +__insn(EXPORT) #undef __insn @@ -29,24 +29,12 @@ #include "ucode/lib.h" #include "ucode/lexer.h" -#define UC_LEX_CONTINUE_PARSING (void *)1 - struct keyword { unsigned type; const char *pat; unsigned plen; }; -struct token { - unsigned type; - union { - uint32_t patn; - char pat[4]; - } u; - unsigned plen; - uc_token_t *(*parse)(uc_lexer_t *); -}; - #define dec(o) \ ((o) - '0') @@ -56,94 +44,6 @@ struct token { #ifndef NO_COMPILE -static uc_token_t *parse_comment(uc_lexer_t *); -static uc_token_t *parse_string(uc_lexer_t *); -static uc_token_t *parse_regexp(uc_lexer_t *); -static uc_token_t *parse_number(uc_lexer_t *); -static uc_token_t *parse_label(uc_lexer_t *); - -static const struct token tokens[] = { - { TK_ASLEFT, { .pat = "<<=" }, 3, NULL }, - { TK_ASRIGHT, { .pat = ">>=" }, 3, NULL }, - { TK_LEXP, { .pat = "{{-" }, 3, NULL }, - { TK_REXP, { .pat = "-}}" }, 3, NULL }, - { TK_LSTM, { .pat = "{%+" }, 3, NULL }, - { TK_LSTM, { .pat = "{%-" }, 3, NULL }, - { TK_RSTM, { .pat = "-%}" }, 3, NULL }, - { TK_EQS, { .pat = "===" }, 3, NULL }, - { TK_NES, { .pat = "!==" }, 3, NULL }, - { TK_ELLIP, { .pat = "..." }, 3, NULL }, - { TK_QLBRACK, { .pat = "?.[" }, 3, NULL }, - { TK_QLPAREN, { .pat = "?.(" }, 3, NULL }, - { TK_ASEXP, { .pat = "**=" }, 3, NULL }, - { TK_ASAND, { .pat = "&&=" }, 3, NULL }, - { TK_ASOR, { .pat = "||=" }, 3, NULL }, - { TK_ASNULLISH, { .pat = "\?\?=" }, 3, NULL }, - { TK_AND, { .pat = "&&" }, 2, NULL }, - { TK_ASADD, { .pat = "+=" }, 2, NULL }, - { TK_ASBAND, { .pat = "&=" }, 2, NULL }, - { TK_ASBOR, { .pat = "|=" }, 2, NULL }, - { TK_ASBXOR, { .pat = "^=" }, 2, NULL }, - //{ TK_ASDIV, { .pat = "/=" }, 2, NULL }, - { TK_ASMOD, { .pat = "%=" }, 2, NULL }, - { TK_ASMUL, { .pat = "*=" }, 2, NULL }, - { TK_ASSUB, { .pat = "-=" }, 2, NULL }, - { TK_EXP, { .pat = "**" }, 2, NULL }, - { TK_DEC, { .pat = "--" }, 2, NULL }, - { TK_INC, { .pat = "++" }, 2, NULL }, - { TK_EQ, { .pat = "==" }, 2, NULL }, - { TK_NE, { .pat = "!=" }, 2, NULL }, - { TK_LE, { .pat = "<=" }, 2, NULL }, - { TK_GE, { .pat = ">=" }, 2, NULL }, - { TK_LSHIFT, { .pat = "<<" }, 2, NULL }, - { TK_RSHIFT, { .pat = ">>" }, 2, NULL }, - { 0, { .pat = "//" }, 2, parse_comment }, - { 0, { .pat = "/*" }, 2, parse_comment }, - { TK_OR, { .pat = "||" }, 2, NULL }, - { TK_LEXP, { .pat = "{{" }, 2, NULL }, - { TK_REXP, { .pat = "}}" }, 2, NULL }, - { TK_LSTM, { .pat = "{%" }, 2, NULL }, - { TK_RSTM, { .pat = "%}" }, 2, NULL }, - { TK_ARROW, { .pat = "=>" }, 2, NULL }, - { TK_NULLISH, { .pat = "??" }, 2, NULL }, - { TK_QDOT, { .pat = "?." }, 2, NULL }, - { TK_PLACEH, { .pat = "${" }, 2, NULL }, - { TK_ADD, { .pat = "+" }, 1, NULL }, - { TK_ASSIGN, { .pat = "=" }, 1, NULL }, - { TK_BAND, { .pat = "&" }, 1, NULL }, - { TK_BOR, { .pat = "|" }, 1, NULL }, - { TK_LBRACK, { .pat = "[" }, 1, NULL }, - { TK_RBRACK, { .pat = "]" }, 1, NULL }, - { TK_BXOR, { .pat = "^" }, 1, NULL }, - { TK_LBRACE, { .pat = "{" }, 1, NULL }, - { TK_RBRACE, { .pat = "}" }, 1, NULL }, - { TK_COLON, { .pat = ":" }, 1, NULL }, - { TK_COMMA, { .pat = "," }, 1, NULL }, - { TK_COMPL, { .pat = "~" }, 1, NULL }, - //{ TK_DIV, { .pat = "/" }, 1, NULL }, - { TK_GT, { .pat = ">" }, 1, NULL }, - { TK_NOT, { .pat = "!" }, 1, NULL }, - { TK_LT, { .pat = "<" }, 1, NULL }, - { TK_MOD, { .pat = "%" }, 1, NULL }, - { TK_MUL, { .pat = "*" }, 1, NULL }, - { TK_LPAREN, { .pat = "(" }, 1, NULL }, - { TK_RPAREN, { .pat = ")" }, 1, NULL }, - { TK_QMARK, { .pat = "?" }, 1, NULL }, - { TK_SCOL, { .pat = ";" }, 1, NULL }, - { TK_SUB, { .pat = "-" }, 1, NULL }, - { TK_DOT, { .pat = "." }, 1, NULL }, - { TK_STRING, { .pat = "'" }, 1, parse_string }, - { TK_STRING, { .pat = "\"" }, 1, parse_string }, - { TK_REGEXP, { .pat = "/" }, 1, parse_regexp }, - { TK_LABEL, { .pat = "_" }, 1, parse_label }, - { TK_LABEL, { .pat = "az" }, 0, parse_label }, - { TK_LABEL, { .pat = "AZ" }, 0, parse_label }, - { TK_NUMBER, { .pat = "09" }, 0, parse_number }, - - /* NB: this must be last for simple retrieval */ - { TK_TEMPLATE, { .pat = "`" }, 1, parse_string } -}; - static const struct keyword reserved_words[] = { { TK_ENDFUNC, "endfunction", 11 }, { TK_CONTINUE, "continue", 8 }, @@ -154,6 +54,8 @@ static const struct keyword reserved_words[] = { { TK_RETURN, "return", 6 }, { TK_ENDFOR, "endfor", 6 }, { TK_SWITCH, "switch", 6 }, + { TK_IMPORT, "import", 6 }, + { TK_EXPORT, "export", 6 }, { TK_ENDIF, "endif", 5 }, { TK_WHILE, "while", 5 }, { TK_BREAK, "break", 5 }, @@ -166,127 +68,128 @@ static const struct keyword reserved_words[] = { { TK_THIS, "this", 4 }, { TK_NULL, "null", 4 }, { TK_CASE, "case", 4 }, + { TK_FROM, "from", 4 }, { TK_TRY, "try", 3 }, { TK_FOR, "for", 3 }, { TK_LOCAL, "let", 3 }, { TK_IF, "if", 2 }, { TK_IN, "in", 2 }, + { TK_AS, "as", 2 }, }; -/* length of the longest token in our lookup table */ -#define UC_LEX_MAX_TOKEN_LEN 3 +static int +fill_buf(uc_lexer_t *lex) { + lex->rbuf = xrealloc(lex->rbuf, 128); + lex->rlen = fread(lex->rbuf, 1, 128, lex->source->fp); + lex->rpos = 0; -static uc_token_t * -emit_op(uc_lexer_t *lex, uint32_t pos, int type, uc_value_t *uv) -{ - lex->curr.type = type; - lex->curr.uv = uv; - lex->curr.pos = pos; + if (!lex->rlen) + return EOF; - return &lex->curr; -} + lex->rpos++; -static void lookbehind_append(uc_lexer_t *lex, const char *data, size_t len) -{ - if (len) { - lex->lookbehind = xrealloc(lex->lookbehind, lex->lookbehindlen + len); - memcpy(lex->lookbehind + lex->lookbehindlen, data, len); - lex->lookbehindlen += len; - } + return (int)lex->rbuf[0]; } -static void lookbehind_reset(uc_lexer_t *lex) { - free(lex->lookbehind); - lex->lookbehind = NULL; - lex->lookbehindlen = 0; -} +static int +update_line(uc_lexer_t *lex, int ch) { + if (ch == '\n' || ch == EOF) + uc_source_line_next(lex->source); + else + uc_source_line_update(lex->source, 1); -static uc_token_t * -lookbehind_to_text(uc_lexer_t *lex, uint32_t pos, int type, const char *strip_trailing_chars) { - uc_token_t *rv = NULL; + lex->source->off++; - if (lex->lookbehind) { - if (strip_trailing_chars) { - while (lex->lookbehindlen > 0 && strchr(strip_trailing_chars, lex->lookbehind[lex->lookbehindlen-1])) - lex->lookbehindlen--; - } + return ch; +} - rv = emit_op(lex, pos, type, ucv_string_new_length(lex->lookbehind, lex->lookbehindlen)); +static int +lookahead_char(uc_lexer_t *lex) { + int c; - lookbehind_reset(lex); - } + if (lex->rpos < lex->rlen) + return (int)lex->rbuf[lex->rpos]; - return rv; -} + c = fill_buf(lex); + lex->rpos = 0; -static inline size_t -buf_remaining(uc_lexer_t *lex) { - return (lex->bufend - lex->bufstart); + return c; } -static inline bool -_buf_startswith(uc_lexer_t *lex, const char *str, size_t len) { - return (buf_remaining(lex) >= len && !strncmp(lex->bufstart, str, len)); -} +static bool +check_char(uc_lexer_t *lex, int ch) { + if (lookahead_char(lex) != ch) + return false; -#define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1) + lex->rpos++; + update_line(lex, ch); -static void -buf_consume(uc_lexer_t *lex, size_t len) { - size_t i, linelen; + return true; +} - for (i = 0, linelen = 0; i < len; i++) { - if (lex->bufstart[i] == '\n') { - uc_source_line_update(lex->source, linelen); - uc_source_line_next(lex->source); +static int +next_char(uc_lexer_t *lex) { + int ch = (lex->rpos < lex->rlen) ? (int)lex->rbuf[lex->rpos++] : fill_buf(lex); - linelen = 0; - } - else { - linelen++; - } - } + return update_line(lex, ch); +} + +static uc_token_t * +emit_op(uc_lexer_t *lex, ssize_t pos, int type, uc_value_t *uv) +{ + lex->curr.type = type; + lex->curr.uv = uv; - if (linelen) - uc_source_line_update(lex->source, linelen); + if (pos < 0) + lex->curr.pos = lex->source->off + pos; + else + lex->curr.pos = (size_t)pos; - lex->bufstart += len; - lex->source->off += len; + return &lex->curr; } static uc_token_t * -parse_comment(uc_lexer_t *lex) -{ - const struct token *tok = lex->tok; - const char *ptr, *end; - size_t elen; +emit_buffer(uc_lexer_t *lex, ssize_t pos, int type, const char *strip_trailing_chars) { + uc_token_t *rv = NULL; + + if (lex->buffer.count) { + if (strip_trailing_chars) + while (lex->buffer.count > 0 && strchr(strip_trailing_chars, *uc_vector_last(&lex->buffer))) + lex->buffer.count--; + + rv = emit_op(lex, pos, type, ucv_string_new_length(uc_vector_first(&lex->buffer), lex->buffer.count)); - if (!strcmp(tok->u.pat, "//")) { - end = "\n"; - elen = 1; + uc_vector_clear(&lex->buffer); } - else { - end = "*/"; - elen = 2; + else if (type != TK_TEXT) { + rv = emit_op(lex, pos, type, ucv_string_new_length("", 0)); } - for (ptr = lex->bufstart; ptr < lex->bufend - elen; ptr++) { - if (!strncmp(ptr, end, elen)) { - buf_consume(lex, (ptr - lex->bufstart) + elen); + return rv; +} - return UC_LEX_CONTINUE_PARSING; - } - } - buf_consume(lex, ptr - lex->bufstart); +static uc_token_t * +parse_comment(uc_lexer_t *lex, int kind) +{ + int ch; + + while (true) { + ch = next_char(lex); - if (lex->eof) { - lex->state = UC_LEX_EOF; + if (kind == '/' && (ch == '\n' || ch == EOF)) + break; + + if (kind == '*' && ch == '*' && check_char(lex, '/')) + break; + + if (ch == EOF) { + lex->state = UC_LEX_EOF; - if (elen == 2) return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated comment")); + } } return NULL; @@ -301,238 +204,157 @@ append_utf8(uc_lexer_t *lex, int code) { rem = sizeof(ustr); if (utf8enc(&up, &rem, code)) - lookbehind_append(lex, ustr, up - ustr); + for (up = ustr; rem < (int)sizeof(ustr); rem++) + uc_vector_push(&lex->buffer, *up++); } static uc_token_t * -parse_string(uc_lexer_t *lex) +parse_string(uc_lexer_t *lex, int kind) { - const struct token *tok = lex->tok; - char q = tok->u.pat[0]; - char *ptr, *c; - uc_token_t *rv; - int code; + int code, ch, i; + unsigned type; + size_t off; - if (!buf_remaining(lex)) - return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated string")); + if (kind == '`') + type = TK_TEMPLATE; + else if (kind == '/') + type = TK_REGEXP; + else + type = TK_STRING; - for (ptr = lex->bufstart; ptr < lex->bufend; ptr++) { - /* continuation of placeholder start */ - if (lex->is_placeholder) { - if (*ptr == '{') { - buf_consume(lex, 1); - rv = lookbehind_to_text(lex, lex->lastoff, tok->type, NULL); + off = lex->source->off - 1; - if (!rv) - rv = emit_op(lex, lex->lastoff, tok->type, ucv_string_new_length("", 0)); + for (ch = next_char(lex); ch != EOF; ch = next_char(lex)) { + switch (ch) { + /* placeholder */ + case '$': + if (type == TK_TEMPLATE && check_char(lex, '{')) { + lex->state = UC_LEX_PLACEHOLDER_START; - return rv; + return emit_buffer(lex, off, type, NULL); } - lex->is_placeholder = false; - lookbehind_append(lex, "$", 1); - } + uc_vector_push(&lex->buffer, '$'); + break; - /* continuation of escape sequence */ - if (lex->is_escape) { - if (lex->esclen == 0) { - /* non-unicode escape following a lead surrogate, emit replacement... */ - if (lex->lead_surrogate && *ptr != 'u') { - append_utf8(lex, 0xFFFD); - lex->lead_surrogate = 0; - } + /* escape sequence */ + case '\\': + /* unicode escape sequence */ + if (type != TK_REGEXP && check_char(lex, 'u')) { + for (i = 0, code = 0; i < 4; i++) { + ch = next_char(lex); - switch ((q == '/') ? 0 : *ptr) { - case 'u': - case 'x': - lex->esc[lex->esclen++] = *ptr; - break; + if (!isxdigit(ch)) + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Invalid escape sequence")); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - lex->esc[lex->esclen++] = 'o'; - lex->esc[lex->esclen++] = *ptr; - break; + code = code * 16 + hex(ch); + } - default: - lex->is_escape = false; - c = strchr("a\ab\be\033f\fn\nr\rt\tv\v", *ptr); + /* is a leading surrogate value */ + if ((code & 0xFC00) == 0xD800) { + /* found a subsequent leading surrogate, ignore and emit replacement char for previous one */ + if (lex->lead_surrogate) + append_utf8(lex, 0xFFFD); - if (c && *c >= 'a') { - lookbehind_append(lex, c + 1, 1); + /* store surrogate value and advance to next escape sequence */ + lex->lead_surrogate = code; + } + + /* is a trailing surrogate value */ + else if ((code & 0xFC00) == 0xDC00) { + /* found a trailing surrogate following a leading one, combine and encode */ + if (lex->lead_surrogate) { + code = 0x10000 + ((lex->lead_surrogate & 0x3FF) << 10) + (code & 0x3FF); + lex->lead_surrogate = 0; } - else { - /* regex mode => retain backslash */ - if (q == '/') - lookbehind_append(lex, "\\", 1); - lookbehind_append(lex, ptr, 1); + /* trailing surrogate not following a leading one, ignore and use replacement char */ + else { + code = 0xFFFD; } - buf_consume(lex, (ptr + 1) - lex->bufstart); + append_utf8(lex, code); + } - break; + /* is a normal codepoint */ + else { + append_utf8(lex, code); } } - else { - switch (lex->esc[0]) { - case 'u': - if (lex->esclen < 5) { - if (!isxdigit(*ptr)) - return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, ucv_string_new("Invalid escape sequence")); - lex->esc[lex->esclen++] = *ptr; - } - - if (lex->esclen == 5) { - code = hex(lex->esc[1]) * 16 * 16 * 16 + - hex(lex->esc[2]) * 16 * 16 + - hex(lex->esc[3]) * 16 + - hex(lex->esc[4]); - - /* is a leading surrogate value */ - if ((code & 0xFC00) == 0xD800) { - /* found a subsequent leading surrogate, ignore and emit replacement char for previous one */ - if (lex->lead_surrogate) - append_utf8(lex, 0xFFFD); - - /* store surrogate value and advance to next escape sequence */ - lex->lead_surrogate = code; - } - - /* is a trailing surrogate value */ - else if ((code & 0xFC00) == 0xDC00) { - /* found a trailing surrogate following a leading one, combine and encode */ - if (lex->lead_surrogate) { - code = 0x10000 + ((lex->lead_surrogate & 0x3FF) << 10) + (code & 0x3FF); - lex->lead_surrogate = 0; - } - - /* trailing surrogate not following a leading one, ignore and use replacement char */ - else { - code = 0xFFFD; - } - - append_utf8(lex, code); - } - - /* is a normal codepoint */ - else { - append_utf8(lex, code); - } - - lex->esclen = 0; - lex->is_escape = false; - buf_consume(lex, (ptr + 1) - lex->bufstart); - } + /* hex escape sequence */ + else if (type != TK_REGEXP && check_char(lex, 'x')) { + for (i = 0, code = 0; i < 2; i++) { + ch = next_char(lex); - break; + if (!isxdigit(ch)) + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Invalid escape sequence")); - case 'x': - if (lex->esclen < 3) { - if (!isxdigit(*ptr)) - return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, ucv_string_new("Invalid escape sequence")); + code = code * 16 + hex(ch); + } - lex->esc[lex->esclen++] = *ptr; - } + append_utf8(lex, code); + } - if (lex->esclen == 3) { - append_utf8(lex, hex(lex->esc[1]) * 16 + hex(lex->esc[2])); + /* octal or letter */ + else { + /* try to parse octal sequence... */ + for (i = 0, code = 0, ch = lookahead_char(lex); + kind != '/' && i < 3 && ch >= '0' && ch <= '7'; + i++, next_char(lex), ch = lookahead_char(lex)) { + code = code * 8 + dec(ch); + } - lex->esclen = 0; - lex->is_escape = false; - buf_consume(lex, (ptr + 1) - lex->bufstart); - } + if (i) { + if (code > 255) + return emit_op(lex, -3, TK_ERROR, ucv_string_new("Invalid escape sequence")); - break; + append_utf8(lex, code); + } - case 'o': - if (lex->esclen < 4) { - /* found a non-octal char */ - if (*ptr < '0' || *ptr > '7') { - /* pad sequence to three chars */ - switch (lex->esclen) { - case 3: - lex->esc[3] = lex->esc[2]; - lex->esc[2] = lex->esc[1]; - lex->esc[1] = '0'; - break; - - case 2: - lex->esc[3] = lex->esc[1]; - lex->esc[2] = '0'; - lex->esc[1] = '0'; - break; - } - - lex->esclen = 4; - buf_consume(lex, ptr-- - lex->bufstart); - } - - /* append */ - else { - lex->esc[lex->esclen++] = *ptr; - buf_consume(lex, (ptr + 1) - lex->bufstart); - } - } + /* ... no octal sequence, handle other escape */ + else { + ch = next_char(lex); - if (lex->esclen == 4) { - code = dec(lex->esc[1]) * 8 * 8 + - dec(lex->esc[2]) * 8 + - dec(lex->esc[3]); + switch (ch) { + case 'a': uc_vector_push(&lex->buffer, '\a'); break; + case 'b': uc_vector_push(&lex->buffer, '\b'); break; + case 'e': uc_vector_push(&lex->buffer, '\033'); break; + case 'f': uc_vector_push(&lex->buffer, '\f'); break; + case 'n': uc_vector_push(&lex->buffer, '\n'); break; + case 'r': uc_vector_push(&lex->buffer, '\r'); break; + case 't': uc_vector_push(&lex->buffer, '\t'); break; + case 'v': uc_vector_push(&lex->buffer, '\v'); break; - if (code > 255) - return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, ucv_string_new("Invalid escape sequence")); + case EOF: + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Unterminated string")); - append_utf8(lex, code); + default: + /* regex mode => retain backslash */ + if (type == TK_REGEXP) + uc_vector_push(&lex->buffer, '\\'); - lex->esclen = 0; - lex->is_escape = false; + uc_vector_push(&lex->buffer, ch); } - - break; } } - } - - /* terminating char */ - else if (*ptr == q) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 1) - lex->bufstart); - - rv = lookbehind_to_text(lex, lex->lastoff, tok->type, NULL); - if (!rv) - rv = emit_op(lex, lex->lastoff, tok->type, ucv_string_new_length("", 0)); - - return rv; - } + break; - /* escape sequence start */ - else if (*ptr == '\\') { - lex->is_escape = true; - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr - lex->bufstart) + 1); - } + /* other character */ + default: + /* terminating delimitter */ + if (ch == kind) + return emit_buffer(lex, off, type, NULL); - /* potential placeholder start */ - else if (q == '`' && *ptr == '$') { - lex->is_placeholder = true; - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr - lex->bufstart) + 1); + uc_vector_push(&lex->buffer, ch); } } - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); + // FIXME + lex->state = UC_LEX_EOF; - return NULL; + return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated string")); } @@ -563,76 +385,31 @@ parse_regexp(uc_lexer_t *lex) size_t len; char *s; - switch (lex->esc[0]) { - case UC_LEX_PARSE_REGEX_INIT: - if (lex->no_regexp) { - if (buf_startswith(lex, "=")) { - buf_consume(lex, 1); - - return emit_op(lex, lex->source->off, TK_ASDIV, NULL); - } - - return emit_op(lex, lex->source->off, TK_DIV, NULL); - } - - lex->esc[0] = UC_LEX_PARSE_REGEX_PATTERN; - break; - - case UC_LEX_PARSE_REGEX_PATTERN: - rv = parse_string(lex); - - if (rv && rv->type == TK_ERROR) - return rv; + rv = parse_string(lex, '/'); - if (rv != NULL && rv != UC_LEX_CONTINUE_PARSING) { - lex->lookbehind = (char *)rv; - lex->esc[0] = UC_LEX_PARSE_REGEX_FLAGS; - } - - break; - - case UC_LEX_PARSE_REGEX_FLAGS: - rv = (uc_token_t *)lex->lookbehind; - - while (lex->bufstart < lex->bufend || lex->eof) { - switch (lex->eof ? EOF : lex->bufstart[0]) { - case 'g': - buf_consume(lex, 1); + if (rv->type == TK_REGEXP) { + while (true) { + if (check_char(lex, 'g')) is_reg_global = true; - break; - - case 'i': - buf_consume(lex, 1); + else if (check_char(lex, 'i')) is_reg_icase = true; - break; - - case 's': - buf_consume(lex, 1); + else if (check_char(lex, 's')) is_reg_newline = true; + else break; - - default: - lex->lookbehind = NULL; - - len = xasprintf(&s, "%c%*s", - (is_reg_global << 0) | (is_reg_icase << 1) | (is_reg_newline << 2), - ucv_string_length(rv->uv), - ucv_string_get(rv->uv)); - - ucv_free(rv->uv, false); - rv->uv = ucv_string_new_length(s, len); - free(s); - - rv->type = TK_REGEXP; - - return rv; - } } - break; + len = xasprintf(&s, "%c%*s", + (is_reg_global << 0) | (is_reg_icase << 1) | (is_reg_newline << 2), + ucv_string_length(rv->uv), + ucv_string_get(rv->uv)); + + ucv_free(rv->uv, false); + rv->uv = ucv_string_new_length(s, len); + free(s); } - return NULL; + return rv; } @@ -647,37 +424,34 @@ parse_regexp(uc_lexer_t *lex) */ static uc_token_t * -parse_label(uc_lexer_t *lex) +parse_label(uc_lexer_t *lex, int ch) { - const struct token *tok = lex->tok; const struct keyword *word; - char *ptr; - size_t i; - - if (!lex->lookbehind && tok->plen) - lookbehind_append(lex, tok->u.pat, tok->plen); + size_t i, len; - if (!buf_remaining(lex) || (lex->bufstart[0] != '_' && !isalnum(lex->bufstart[0]))) { - if (lex->no_keyword == false) { - for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) { - if (lex->lookbehind && lex->lookbehindlen == word->plen && !strncmp(lex->lookbehind, word->pat, word->plen)) { - lookbehind_reset(lex); + while (true) { + uc_vector_push(&lex->buffer, ch); + ch = lookahead_char(lex); - return emit_op(lex, lex->source->off - word->plen, word->type, NULL); - } - } - } + if (!isalnum(ch) && ch != '_') + break; - return lookbehind_to_text(lex, lex->source->off - lex->lookbehindlen, TK_LABEL, NULL); + next_char(lex); } - for (ptr = lex->bufstart; ptr < lex->bufend && (*ptr == '_' || isalnum(*ptr)); ptr++) - ; + len = lex->buffer.count; - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); + if (!lex->no_keyword) { + for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) { + if (lex->buffer.count == word->plen && !strncmp(uc_vector_first(&lex->buffer), word->pat, word->plen)) { + uc_vector_clear(&lex->buffer); - return NULL; + return emit_op(lex, -len, word->type, NULL); + } + } + } + + return emit_buffer(lex, -len, TK_LABEL, NULL); } @@ -694,7 +468,7 @@ parse_label(uc_lexer_t *lex) static inline bool is_numeric_char(uc_lexer_t *lex, char c) { - char prev = lex->lookbehindlen ? lex->lookbehind[lex->lookbehindlen-1] : 0; + char prev = lex->buffer.count ? *uc_vector_last(&lex->buffer) : 0; switch (c|32) { case '.': @@ -731,380 +505,507 @@ is_numeric_char(uc_lexer_t *lex, char c) } static uc_token_t * -parse_number(uc_lexer_t *lex) +parse_number(uc_lexer_t *lex, int ch) { - uc_token_t *rv = NULL; uc_value_t *nv = NULL; - const char *ptr; + size_t len; char *e; - if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) { - lookbehind_append(lex, "\0", 1); - - nv = uc_number_parse_octal(lex->lookbehind, &e); + while (true) { + uc_vector_push(&lex->buffer, ch); + ch = lookahead_char(lex); - switch (ucv_type(nv)) { - case UC_DOUBLE: - rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, nv); + if (!is_numeric_char(lex, ch)) break; - case UC_INTEGER: - rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, nv); - break; + next_char(lex); + } - default: - rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal")); - } + len = lex->buffer.count; - lookbehind_reset(lex); + uc_vector_push(&lex->buffer, '\0'); - return rv; - } + nv = uc_number_parse_octal(uc_vector_first(&lex->buffer), &e); - for (ptr = lex->bufstart; ptr < lex->bufend && is_numeric_char(lex, *ptr); ptr++) - ; + uc_vector_clear(&lex->buffer); - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); + switch (ucv_type(nv)) { + case UC_DOUBLE: + return emit_op(lex, -len, TK_DOUBLE, nv); - return NULL; + case UC_INTEGER: + return emit_op(lex, -len, TK_NUMBER, nv); + + default: + return emit_op(lex, -len, TK_ERROR, ucv_string_new("Invalid number literal")); + } } static uc_token_t * -lex_step(uc_lexer_t *lex, FILE *fp) +lex_find_token(uc_lexer_t *lex) { - uint32_t masks[] = { 0, le32toh(0x000000ff), le32toh(0x0000ffff), le32toh(0x00ffffff), le32toh(0xffffffff) }; - union { uint32_t n; char str[4]; } search; - const struct token *tok; - size_t rlen, rem, *nest; - char *ptr, c; - uc_token_t *rv; - size_t i; + bool tpl = !(lex->config && lex->config->raw_mode); + int ch = next_char(lex); + + while (isspace(ch)) + ch = next_char(lex); + + switch (ch) { + case '~': + return emit_op(lex, -1, TK_COMPL, NULL); - /* only less than UC_LEX_MAX_TOKEN_LEN unread buffer chars remaining, - * move the remaining bytes to the beginning and read more data */ - if (buf_remaining(lex) < UC_LEX_MAX_TOKEN_LEN) { - if (!lex->buf) { - lex->buflen = 128; - lex->buf = xalloc(lex->buflen); + case '}': + if (tpl && check_char(lex, '}')) + return emit_op(lex, -2, TK_REXP, NULL); + + return emit_op(lex, -1, TK_RBRACE, NULL); + + case '|': + if (check_char(lex, '|')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASOR, NULL); + + return emit_op(lex, -2, TK_OR, NULL); } - rem = lex->bufend - lex->bufstart; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASBOR, NULL); - if (rem) - memcpy(lex->buf, lex->bufstart, rem); + return emit_op(lex, -1, TK_BOR, NULL); - rlen = fread(lex->buf + rem, 1, lex->buflen - rem, fp); + case '{': + if (tpl && check_char(lex, '{')) + return emit_op(lex, -2, TK_LEXP, NULL); - lex->bufstart = lex->buf; - lex->bufend = lex->buf + rlen + rem; + if (tpl && check_char(lex, '%')) + return emit_op(lex, -2, TK_LSTM, NULL); - if (rlen == 0 && (ferror(fp) || feof(fp))) - lex->eof = 1; - } + return emit_op(lex, -1, TK_LBRACE, NULL); - switch (lex->state) { - case UC_LEX_IDENTIFY_BLOCK: - /* previous block had strip trailing whitespace flag, skip leading whitespace */ - if (lex->modifier == MINUS) { - while (buf_remaining(lex) && isspace(lex->bufstart[0])) - buf_consume(lex, 1); + case '^': + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASBXOR, NULL); - lex->modifier = UNSPEC; + return emit_op(lex, -1, TK_BXOR, NULL); + + case '[': + return emit_op(lex, -1, TK_LBRACK, NULL); + + case ']': + return emit_op(lex, -1, TK_RBRACK, NULL); + + case '?': + if (check_char(lex, '?')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASNULLISH, NULL); + + return emit_op(lex, -2, TK_NULLISH, NULL); } - /* previous block was a statement block and trim_blocks is enabld, skip leading newline */ - else if (lex->modifier == NEWLINE) { - if (buf_startswith(lex, "\n")) - buf_consume(lex, 1); + if (check_char(lex, '.')) { + if (check_char(lex, '[')) + return emit_op(lex, -3, TK_QLBRACK, NULL); + + if (check_char(lex, '(')) + return emit_op(lex, -3, TK_QLPAREN, NULL); - lex->modifier = UNSPEC; + return emit_op(lex, -2, TK_QDOT, NULL); } - /* scan forward through buffer to identify start token */ - for (ptr = lex->bufstart; ptr < lex->bufend - strlen("{#"); ptr++) { - /* found start of comment block */ - if (!strncmp(ptr, "{#", 2)) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 2) - lex->bufstart); - lex->lastoff = lex->source->off - 2; - lex->state = UC_LEX_BLOCK_COMMENT_START; + return emit_op(lex, lex->source->off, TK_QMARK, NULL); - return NULL; - } + case '>': + if (check_char(lex, '>')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASRIGHT, NULL); - /* found start of expression block */ - else if (!strncmp(ptr, "{{", 2)) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 2) - lex->bufstart); - lex->lastoff = lex->source->off - 2; - lex->state = UC_LEX_BLOCK_EXPRESSION_START; + return emit_op(lex, -2, TK_RSHIFT, NULL); + } - return NULL; - } + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_GE, NULL); - /* found start of statement block */ - else if (!strncmp(ptr, "{%", 2)) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 2) - lex->bufstart); - lex->lastoff = lex->source->off - 2; - lex->state = UC_LEX_BLOCK_STATEMENT_START; + return emit_op(lex, -1, TK_GT, NULL); - return NULL; - } + case '=': + if (check_char(lex, '=')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_EQS, NULL); + + return emit_op(lex, -2, TK_EQ, NULL); } - /* we're at eof */ - if (lex->eof) { - lookbehind_append(lex, ptr, lex->bufend - ptr); - lex->state = UC_LEX_EOF; + if (check_char(lex, '>')) + return emit_op(lex, -2, TK_ARROW, NULL); + + return emit_op(lex, -1, TK_ASSIGN, NULL); + + case '<': + if (check_char(lex, '<')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASLEFT, NULL); - return lookbehind_to_text(lex, lex->lastoff, TK_TEXT, NULL); + return emit_op(lex, -2, TK_LSHIFT, NULL); } - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); - break; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_LE, NULL); + + return emit_op(lex, -1, TK_LT, NULL); + + case ';': + return emit_op(lex, -1, TK_SCOL, NULL); + case ':': + return emit_op(lex, -1, TK_COLON, NULL); - case UC_LEX_BLOCK_COMMENT_START: - case UC_LEX_BLOCK_EXPRESSION_START: - case UC_LEX_BLOCK_STATEMENT_START: - rv = NULL; - lex->modifier = UNSPEC; + case '/': + ch = lookahead_char(lex); + lex->lastoff = lex->source->off - 1; - /* strip whitespace before block */ - if (buf_startswith(lex, "-")) { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, " \n\t\v\f\r"); - buf_consume(lex, 1); + if (ch == '/' || ch == '*') + return parse_comment(lex, ch); + + if (lex->no_regexp) { + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASDIV, NULL); + + return emit_op(lex, -1, TK_DIV, NULL); } - /* disable lstrip flag (only valid for statement blocks) */ - else if (lex->state == UC_LEX_BLOCK_STATEMENT_START) { - /* disable lstrip flag */ - if (buf_startswith(lex, "+")) { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, NULL); - buf_consume(lex, 1); - } + return parse_regexp(lex); - /* put out text leading up to the opening tag and potentially - * strip trailing white space from it depending on the global - * block lstrip setting */ - else { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, - (lex->config && lex->config->lstrip_blocks) ? " \t\v\f\r" : NULL); + case '.': + if (check_char(lex, '.')) { + if (check_char(lex, '.')) + return emit_op(lex, -3, TK_ELLIP, NULL); + + /* The sequence ".." cannot be a valid */ + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Unexpected character")); + } + + return emit_op(lex, -1, TK_DOT, NULL); + + case '-': + if (tpl && check_char(lex, '}')) { + if (check_char(lex, '}')) { + lex->modifier = MINUS; + + return emit_op(lex, -3, TK_REXP, NULL); } + + /* The sequence "-}" cannot be a valid */ + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Unexpected character")); } - else { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, NULL); + + if (tpl && check_char(lex, '%')) { + if (check_char(lex, '}')) { + lex->modifier = MINUS; + + return emit_op(lex, -3, TK_RSTM, NULL); + } + + /* The sequence "-%" cannot be a valid */ + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Unexpected character")); } - switch (lex->state) { - case UC_LEX_BLOCK_COMMENT_START: - lex->state = UC_LEX_BLOCK_COMMENT; - lex->block = COMMENT; - break; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASSUB, NULL); - case UC_LEX_BLOCK_STATEMENT_START: - lex->state = UC_LEX_IDENTIFY_TOKEN; - lex->block = STATEMENTS; - break; + if (check_char(lex, '-')) + return emit_op(lex, -2, TK_DEC, NULL); - case UC_LEX_BLOCK_EXPRESSION_START: - lex->state = UC_LEX_BLOCK_EXPRESSION_EMIT_TAG; - break; + return emit_op(lex, -1, TK_SUB, NULL); - default: - break; + case ',': + return emit_op(lex, -1, TK_COMMA, NULL); + + case '+': + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASADD, NULL); + + if (check_char(lex, '+')) + return emit_op(lex, -2, TK_INC, NULL); + + return emit_op(lex, -1, TK_ADD, NULL); + + case '*': + if (check_char(lex, '*')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASEXP, NULL); + + return emit_op(lex, -2, TK_EXP, NULL); } - return rv; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASMUL, NULL); + return emit_op(lex, -1, TK_MUL, NULL); - case UC_LEX_BLOCK_COMMENT: - /* scan forward through buffer to identify end token */ - while (lex->bufstart < lex->bufend - 2) { - if (buf_startswith(lex, "-#}")) { - lex->state = UC_LEX_IDENTIFY_BLOCK; - lex->modifier = MINUS; - buf_consume(lex, 3); - lex->lastoff = lex->source->off; - break; - } - else if (buf_startswith(lex, "#}")) { - lex->state = UC_LEX_IDENTIFY_BLOCK; - buf_consume(lex, 2); - lex->lastoff = lex->source->off; - break; - } + case '(': + return emit_op(lex, -1, TK_LPAREN, NULL); + + case ')': + return emit_op(lex, -1, TK_RPAREN, NULL); - buf_consume(lex, 1); + case '\'': + case '"': + case '`': + lex->lastoff = lex->source->off - 1; + + return parse_string(lex, ch); + + case '&': + if (check_char(lex, '&')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASAND, NULL); + + return emit_op(lex, -2, TK_AND, NULL); } - /* we're at eof */ - if (lex->eof) { - lex->state = UC_LEX_EOF; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASBAND, NULL); + + return emit_op(lex, -1, TK_BAND, NULL); - buf_consume(lex, lex->bufend - lex->bufstart); + case '%': + if (tpl && check_char(lex, '}')) + return emit_op(lex, -2, TK_RSTM, NULL); - return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated template block")); + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASMOD, NULL); + + return emit_op(lex, -1, TK_MOD, NULL); + + case '!': + if (check_char(lex, '=')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_NES, NULL); + + return emit_op(lex, -2, TK_NE, NULL); } - break; + return emit_op(lex, -1, TK_NOT, NULL); + case EOF: + return emit_op(lex, -1, TK_EOF, NULL); - case UC_LEX_BLOCK_EXPRESSION_EMIT_TAG: - lex->state = UC_LEX_IDENTIFY_TOKEN; - lex->block = EXPRESSION; + default: + if (isalpha(ch) || ch == '_') + return parse_label(lex, ch); - return emit_op(lex, lex->source->off, TK_LEXP, NULL); + if (isdigit(ch)) + return parse_number(lex, ch); + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Unexpected character")); + } +} - case UC_LEX_IDENTIFY_TOKEN: - /* skip leading whitespace */ - for (i = 0; i < buf_remaining(lex) && isspace(lex->bufstart[i]); i++) - ; +static uc_token_t * +lex_step(uc_lexer_t *lex) +{ + const char *strip = NULL; + uc_token_t *tok; + size_t *nest; + int ch; - buf_consume(lex, i); + while (lex->state != UC_LEX_EOF) { + switch (lex->state) { + case UC_LEX_IDENTIFY_BLOCK: + ch = next_char(lex); - if (i > 0 && buf_remaining(lex) < UC_LEX_MAX_TOKEN_LEN) - return NULL; + /* previous block had strip trailing whitespace flag, skip leading whitespace */ + if (lex->modifier == MINUS) { + while (isspace(ch)) + ch = next_char(lex); - for (i = 0; i < sizeof(search.str); i++) - search.str[i] = (i < buf_remaining(lex)) ? lex->bufstart[i] : 0; + lex->modifier = UNSPEC; + } - for (i = 0, tok = tokens; i < ARRAY_SIZE(tokens); tok = &tokens[++i]) { - /* remaining buffer data is shorter than token, skip */ - if (tok->plen > buf_remaining(lex)) - continue; + /* previous block was a statement block and trim_blocks is enabled, skip leading newline */ + else if (lex->modifier == NEWLINE) { + if (ch == '\n') + ch = next_char(lex); - c = buf_remaining(lex) ? lex->bufstart[0] : 0; + lex->modifier = UNSPEC; + } - if (tok->plen ? ((search.n & masks[tok->plen]) == tok->u.patn) - : (c >= tok->u.pat[0] && c <= tok->u.pat[1])) { - lex->lastoff = lex->source->off; + /* scan forward through buffer to identify block start token */ + while (ch != EOF) { + if (ch == '{') { + ch = next_char(lex); - /* token has a parse method, switch state */ - if (tok->parse) { - lex->tok = tok; - lex->state = UC_LEX_PARSE_TOKEN; + switch (ch) { + /* found start of comment block */ + case '#': + lex->state = UC_LEX_BLOCK_COMMENT; + lex->block = COMMENT; - buf_consume(lex, tok->plen); + if (check_char(lex, '-')) + strip = " \n\t\v\f\r"; - return NULL; - } + break; - /* in raw code mode, ignore template tag tokens */ - if (lex->config && lex->config->raw_mode && - (tok->type == TK_LSTM || tok->type == TK_RSTM || - tok->type == TK_LEXP || tok->type == TK_REXP)) { - continue; - } + /* found start of expression block */ + case '{': + lex->state = UC_LEX_BLOCK_EXPRESSION_EMIT_TAG; - /* disallow nesting blocks */ - if (tok->type == TK_LSTM || tok->type == TK_LEXP) { - buf_consume(lex, tok->plen); + if (check_char(lex, '-')) + strip = " \n\t\v\f\r"; - return emit_op(lex, lex->source->off - tok->plen, TK_ERROR, ucv_string_new("Template blocks may not be nested")); - } + break; - /* found end of block */ - else if ((lex->block == STATEMENTS && tok->type == TK_RSTM) || - (lex->block == EXPRESSION && tok->type == TK_REXP)) { - /* strip whitespace after block */ - if (tok->u.pat[0] == '-') - lex->modifier = MINUS; + /* found start of statement block */ + case '%': + lex->state = UC_LEX_IDENTIFY_TOKEN; + lex->block = STATEMENTS; - /* strip newline after statement block */ - else if (lex->block == STATEMENTS && - lex->config && lex->config->trim_blocks) - lex->modifier = NEWLINE; + if (check_char(lex, '-')) + strip = " \n\t\v\f\r"; + else if (check_char(lex, '+')) + strip = NULL; + else if (lex->config && lex->config->lstrip_blocks) + strip = " \t\v\f\r"; - lex->state = UC_LEX_IDENTIFY_BLOCK; - lex->block = NONE; - } + break; + + default: + /* not a start tag, remember char and move on */ + uc_vector_push(&lex->buffer, '{'); + continue; + } - /* track opening braces */ - else if (tok->type == TK_LBRACE && lex->templates.count > 0) { - nest = uc_vector_last(&lex->templates); - (*nest)++; + break; } - /* check end of placeholder expression */ - else if (tok->type == TK_RBRACE && lex->templates.count > 0) { - nest = uc_vector_last(&lex->templates); + uc_vector_push(&lex->buffer, ch); + ch = next_char(lex); + } - if (*nest == 0) { - lex->templates.count--; - lex->state = UC_LEX_PARSE_TOKEN; - lex->tok = &tokens[ARRAY_SIZE(tokens) - 1]; /* NB: TK_TEMPLATE token spec */ - } - else { - (*nest)--; - } + if (ch == EOF) + lex->state = UC_LEX_EOF; + + /* push out leading text */ + tok = emit_buffer(lex, lex->lastoff, TK_TEXT, strip); + lex->lastoff = lex->source->off - 2; + + if (!tok) + continue; + + return tok; + + + case UC_LEX_BLOCK_COMMENT: + ch = next_char(lex); + + /* scan forward through buffer to identify end token */ + while (ch != EOF) { + if (ch == '-' && check_char(lex, '#') && check_char(lex, '}')) { + lex->modifier = MINUS; + break; } - /* do not report statement tags to the parser */ - if (tok->type != 0 && tok->type != TK_LSTM) - rv = emit_op(lex, lex->source->off, - (tok->type == TK_RSTM) ? TK_SCOL : tok->type, NULL); - else - rv = NULL; + if (ch == '#' && check_char(lex, '}')) + break; + + ch = next_char(lex); + } - buf_consume(lex, tok->plen); + if (ch == EOF) { + lex->state = UC_LEX_EOF; - return rv; + return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated template block")); } - } - /* no possible return beyond this point can advance, - mark lex state as eof */ - lex->state = UC_LEX_EOF; + lex->lastoff = lex->source->off; + lex->state = UC_LEX_IDENTIFY_BLOCK; - /* no token matched and we do have remaining data, junk */ - if (buf_remaining(lex)) - return emit_op(lex, lex->source->off, TK_ERROR, ucv_string_new("Unexpected character")); + continue; - /* we're at eof, allow unclosed statement blocks */ - if (lex->block == STATEMENTS) - return NULL; - /* premature EOF */ - return emit_op(lex, lex->source->off, TK_ERROR, ucv_string_new("Unterminated template block")); + case UC_LEX_BLOCK_EXPRESSION_EMIT_TAG: + lex->state = UC_LEX_IDENTIFY_TOKEN; + lex->block = EXPRESSION; + return emit_op(lex, lex->source->off, TK_LEXP, NULL); - case UC_LEX_PARSE_TOKEN: - tok = lex->tok; - rv = tok->parse(lex); - if (rv) { - memset(lex->esc, 0, sizeof(lex->esc)); - lex->state = lex->is_placeholder ? UC_LEX_PLACEHOLDER : UC_LEX_IDENTIFY_TOKEN; - lex->is_placeholder = false; - lex->tok = NULL; + case UC_LEX_IDENTIFY_TOKEN: + do { tok = lex_find_token(lex); } while (tok == NULL); - if (rv == UC_LEX_CONTINUE_PARSING) - rv = NULL; + /* disallow nesting blocks */ + if (tok->type == TK_LSTM || tok->type == TK_LEXP) + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Template blocks may not be nested")); - return rv; - } + /* found end of statement block */ + if (lex->block == STATEMENTS && tok->type == TK_RSTM) { + /* strip newline after statement block? */ + if (lex->modifier == UNSPEC && lex->config && lex->config->trim_blocks) + lex->modifier = NEWLINE; - break; + lex->lastoff = lex->source->off; + lex->state = UC_LEX_IDENTIFY_BLOCK; + lex->block = NONE; + tok = emit_op(lex, -2, TK_SCOL, NULL); + } - case UC_LEX_PLACEHOLDER: - lex->state = UC_LEX_IDENTIFY_TOKEN; + /* found end of expression block */ + else if (lex->block == EXPRESSION && tok->type == TK_REXP) { + lex->lastoff = lex->source->off; + lex->state = UC_LEX_IDENTIFY_BLOCK; + lex->block = NONE; + } + + /* track opening braces */ + else if (tok->type == TK_LBRACE && lex->templates.count > 0) { + nest = uc_vector_last(&lex->templates); + (*nest)++; + } + + /* check end of placeholder expression */ + else if (tok->type == TK_RBRACE && lex->templates.count > 0) { + nest = uc_vector_last(&lex->templates); + + if (*nest == 0) { + lex->templates.count--; + lex->state = UC_LEX_PLACEHOLDER_END; + } + else { + (*nest)--; + } + } + + /* premature EOF? */ + else if (tok->type == TK_EOF && lex->block != STATEMENTS) { + lex->state = UC_LEX_EOF; + + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Unterminated template block")); + } - uc_vector_push(&lex->templates, 0); + return tok; - return emit_op(lex, lex->source->off, TK_PLACEH, NULL); + case UC_LEX_PLACEHOLDER_START: + lex->state = UC_LEX_IDENTIFY_TOKEN; + + uc_vector_push(&lex->templates, 0); + + return emit_op(lex, -2, TK_PLACEH, NULL); - case UC_LEX_EOF: - break; + + case UC_LEX_PLACEHOLDER_END: + lex->state = UC_LEX_IDENTIFY_TOKEN; + + return parse_string(lex, '`'); + + + case UC_LEX_EOF: + break; + } } - return NULL; + return emit_op(lex, lex->source->off, TK_EOF, NULL); } void @@ -1115,24 +1016,15 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) lex->config = config; lex->source = uc_source_get(source); - lex->eof = 0; - lex->is_escape = 0; - lex->block = NONE; lex->modifier = UNSPEC; - lex->buflen = 0; - lex->buf = NULL; - lex->bufstart = NULL; - lex->bufend = NULL; - - lex->lookbehindlen = 0; - lex->lookbehind = NULL; + lex->rlen = 0; + lex->rpos = 0; + lex->rbuf = NULL; - lex->tok = NULL; - - lex->esclen = 0; - memset(lex->esc, 0, sizeof(lex->esc)); + lex->buffer.count = 0; + lex->buffer.entries = NULL; lex->lead_surrogate = 0; @@ -1150,11 +1042,12 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) void uc_lexer_free(uc_lexer_t *lex) { + uc_vector_clear(&lex->buffer); uc_vector_clear(&lex->templates); + uc_source_put(lex->source); - free(lex->lookbehind); - free(lex->buf); + free(lex->rbuf); } uc_token_t * @@ -1162,47 +1055,94 @@ uc_lexer_next_token(uc_lexer_t *lex) { uc_token_t *rv = NULL; - while (lex->state != UC_LEX_EOF) { - rv = lex_step(lex, lex->source->fp); - - if (rv != NULL) - break; - } - - if (rv) { - lex->no_keyword = false; - lex->no_regexp = false; + rv = lex_step(lex); - return rv; - } + lex->no_keyword = false; + lex->no_regexp = false; - return emit_op(lex, lex->source->off, TK_EOF, NULL); + return rv; } const char * uc_tokenname(unsigned type) { static char buf[sizeof("'endfunction'")]; - size_t i; - - switch (type) { - case 0: return "End of file"; - case TK_TEMPLATE: return "Template"; - case TK_STRING: return "String"; - case TK_LABEL: return "Label"; - case TK_NUMBER: return "Number"; - case TK_DOUBLE: return "Double"; - case TK_REGEXP: return "Regexp"; - } - - for (i = 0; i < ARRAY_SIZE(tokens); i++) { - if (tokens[i].type != type) - continue; + const char *tokennames[] = { + [TK_LEXP] = "'{{'", + [TK_REXP] = "'}}'", + [TK_LSTM] = "'{%'", + [TK_RSTM] = "'%}'", + [TK_COMMA] = "','", + [TK_ASSIGN] = "'='", + [TK_ASADD] = "'+='", + [TK_ASSUB] = "'-='", + [TK_ASMUL] = "'*='", + [TK_ASDIV] = "'/='", + [TK_ASMOD] = "'%='", + [TK_ASLEFT] = "'<<='", + [TK_ASRIGHT] = "'>>='", + [TK_ASBAND] = "'&='", + [TK_ASBXOR] = "'^='", + [TK_ASBOR] = "'|='", + [TK_QMARK] = "'?'", + [TK_COLON] = "':'", + [TK_OR] = "'||'", + [TK_AND] = "'&&'", + [TK_BOR] = "'|'", + [TK_BXOR] = "'^'", + [TK_BAND] = "'&'", + [TK_EQS] = "'==='", + [TK_NES] = "'!=='", + [TK_EQ] = "'=='", + [TK_NE] = "'!='", + [TK_LT] = "'<'", + [TK_LE] = "'<='", + [TK_GT] = "'>'", + [TK_GE] = "'>='", + [TK_LSHIFT] = "'<<'", + [TK_RSHIFT] = "'>>'", + [TK_ADD] = "'+'", + [TK_SUB] = "'-'", + [TK_MUL] = "'*'", + [TK_DIV] = "'/'", + [TK_MOD] = "'%'", + [TK_EXP] = "'**'", + [TK_NOT] = "'!'", + [TK_COMPL] = "'~'", + [TK_INC] = "'++'", + [TK_DEC] = "'--'", + [TK_DOT] = "'.'", + [TK_LBRACK] = "'['", + [TK_RBRACK] = "']'", + [TK_LPAREN] = "'('", + [TK_RPAREN] = "')'", + [TK_LBRACE] = "'{'", + [TK_RBRACE] = "'}'", + [TK_SCOL] = "';'", + [TK_ELLIP] = "'...'", + [TK_ARROW] = "'=>'", + [TK_QLBRACK] = "'?.['", + [TK_QLPAREN] = "'?.('", + [TK_QDOT] = "'?.'", + [TK_ASEXP] = "'**='", + [TK_ASAND] = "'&&='", + [TK_ASOR] = "'||='", + [TK_ASNULLISH] = "'\?\?='", + [TK_NULLISH] = "'\?\?'", + [TK_PLACEH] = "'${'", + + [TK_TEXT] = "Text", + [TK_LABEL] = "Label", + [TK_NUMBER] = "Number", + [TK_DOUBLE] = "Double", + [TK_STRING] = "String", + [TK_REGEXP] = "Regexp", + [TK_TEMPLATE] = "Template", + [TK_ERROR] = "Error", + [TK_EOF] = "End of file", + }; - snprintf(buf, sizeof(buf), "'%s'", tokens[i].u.pat); - - return buf; - } + size_t i; for (i = 0; i < ARRAY_SIZE(reserved_words); i++) { if (reserved_words[i].type != type) @@ -1213,7 +1153,7 @@ uc_tokenname(unsigned type) return buf; } - return "?"; + return tokennames[type] ? tokennames[type] : "?"; } bool @@ -369,6 +369,23 @@ uc_rindex(uc_vm_t *vm, size_t nargs) return uc_index(vm, nargs, true); } +static bool +assert_mutable_array(uc_vm_t *vm, uc_value_t *val) +{ + if (ucv_type(val) != UC_ARRAY) + return false; + + if (ucv_is_constant(val)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "%s value is immutable", + ucv_typename(val)); + + return false; + } + + return true; +} + static uc_value_t * uc_push(uc_vm_t *vm, size_t nargs) { @@ -376,7 +393,7 @@ uc_push(uc_vm_t *vm, size_t nargs) uc_value_t *item = NULL; size_t arridx; - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; for (arridx = 1; arridx < nargs; arridx++) { @@ -392,6 +409,9 @@ uc_pop(uc_vm_t *vm, size_t nargs) { uc_value_t *arr = uc_fn_arg(0); + if (!assert_mutable_array(vm, arr)) + return NULL; + return ucv_array_pop(arr); } @@ -400,6 +420,9 @@ uc_shift(uc_vm_t *vm, size_t nargs) { uc_value_t *arr = uc_fn_arg(0); + if (!assert_mutable_array(vm, arr)) + return NULL; + return ucv_array_shift(arr); } @@ -410,7 +433,7 @@ uc_unshift(uc_vm_t *vm, size_t nargs) uc_value_t *item = NULL; size_t i; - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; for (i = 1; i < nargs; i++) { @@ -755,6 +778,9 @@ uc_reverse(uc_vm_t *vm, size_t nargs) char *dup, *p; if (ucv_type(obj) == UC_ARRAY) { + if (!assert_mutable_array(vm, obj)) + return NULL; + rv = ucv_array_new(vm); for (arridx = ucv_array_length(obj); arridx > 0; arridx--) @@ -851,7 +877,7 @@ uc_sort(uc_vm_t *vm, size_t nargs) uc_value_t *arr = uc_fn_arg(0); uc_value_t *fn = uc_fn_arg(1); - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; sort_ctx.vm = vm; @@ -870,7 +896,7 @@ uc_splice(uc_vm_t *vm, size_t nargs) int64_t remlen = ucv_to_integer(uc_fn_arg(2)); size_t arrlen, addlen, idx; - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; arrlen = ucv_array_length(arr); @@ -2352,7 +2378,7 @@ uc_include_common(uc_vm_t *vm, size_t nargs, bool raw_mode) if (!closure) return NULL; - p = include_path(closure->function->program->source->runpath, ucv_string_get(path)); + p = include_path(uc_program_function_source(closure->function)->runpath, ucv_string_get(path)); if (!p) { uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, @@ -2870,7 +2896,7 @@ uc_sourcepath(uc_vm_t *vm, size_t nargs) continue; } - path = realpath(frame->closure->function->program->source->runpath, NULL); + path = realpath(uc_program_function_source(frame->closure->function)->runpath, NULL); break; } @@ -387,14 +387,13 @@ parse_define_string(char *opt, uc_value_t *globals) } static void -parse_search_path(char *pattern, uc_value_t *globals) +parse_search_path(char *pattern, uc_parse_config_t *config) { - uc_value_t *rsp = ucv_object_get(globals, "REQUIRE_SEARCH_PATH", NULL); size_t len; char *p; if (strchr(pattern, '*')) { - ucv_array_push(rsp, ucv_string_new(pattern)); + uc_search_path_add(&config->module_search_path, pattern); return; } @@ -407,11 +406,11 @@ parse_search_path(char *pattern, uc_value_t *globals) pattern[--len] = 0; xasprintf(&p, "%s/*.so", pattern); - ucv_array_push(rsp, ucv_string_new(p)); + uc_search_path_add(&config->module_search_path, p); free(p); xasprintf(&p, "%s/*.uc", pattern); - ucv_array_push(rsp, ucv_string_new(p)); + uc_search_path_add(&config->module_search_path, p); free(p); } @@ -462,6 +461,7 @@ appname(const char *argv0) int main(int argc, char **argv) { + const char *optspec = "he:tST::RD:F:U:l:L:c::o:s"; char *interp = "/usr/bin/env ucode"; uc_source_t *source = NULL; FILE *precompile = NULL; @@ -480,6 +480,8 @@ main(int argc, char **argv) .raw_mode = true }; + uc_search_path_init(&config.module_search_path); + app = appname(argv[0]); if (argc == 1) { @@ -494,6 +496,31 @@ main(int argc, char **argv) stdin_unused = stdin; + /* parse options iteration 1: parse config related options */ + while ((opt = getopt(argc, argv, optspec)) != -1) + { + switch (opt) { + case 'L': + parse_search_path(optarg, &config); + break; + + case 'S': + config.strict_declarations = true; + break; + + case 'R': + config.raw_mode = true; + break; + + case 'T': + config.raw_mode = false; + parse_template_modeflags(optarg, &config); + break; + } + } + + optind = 1; + uc_vm_init(&vm, &config); /* load std functions into global scope */ @@ -504,8 +531,8 @@ main(int argc, char **argv) ucv_object_add(uc_vm_scope_get(&vm), "ARGV", ucv_get(o)); - /* parse options */ - while ((opt = getopt(argc, argv, "he:tST::RD:F:U:l:L:c::o:s")) != -1) + /* parse options iteration 2: process remaining options */ + while ((opt = getopt(argc, argv, optspec)) != -1) { switch (opt) { case 'h': @@ -520,19 +547,6 @@ main(int argc, char **argv) uc_vm_trace_set(&vm, 1); break; - case 'S': - config.strict_declarations = true; - break; - - case 'R': - config.raw_mode = true; - break; - - case 'T': - config.raw_mode = false; - parse_template_modeflags(optarg, &config); - break; - case 'D': if (!parse_define_string(optarg, uc_vm_scope_get(&vm))) { rv = 1; @@ -553,10 +567,6 @@ main(int argc, char **argv) ucv_object_delete(uc_vm_scope_get(&vm), optarg); break; - case 'L': - parse_search_path(optarg, uc_vm_scope_get(&vm)); - break; - case 'l': if (!parse_library_load(optarg, &vm)) { rv = 1; @@ -629,6 +639,8 @@ main(int argc, char **argv) rv = compile(&vm, source, precompile, strip, interp); out: + uc_search_path_free(&config.module_search_path); + uc_source_put(source); uc_vm_free(&vm); @@ -25,7 +25,7 @@ uc_program_t * -uc_program_new(uc_source_t *source) +uc_program_new(void) { uc_program_t *prog; @@ -37,15 +37,13 @@ uc_program_new(uc_source_t *source) prog->functions.next = &prog->functions; prog->functions.prev = &prog->functions; - prog->source = uc_source_get(source); - uc_vallist_init(&prog->constants); return prog; } uc_function_t * -uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos) +uc_program_function_new(uc_program_t *prog, const char *name, uc_source_t *source, size_t srcpos) { uc_function_t *func; size_t namelen = 0; @@ -58,6 +56,13 @@ uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos) if (name) strcpy(func->name, name); + for (func->srcidx = 0; func->srcidx < prog->sources.count; func->srcidx++) + if (prog->sources.entries[func->srcidx] == source) + break; + + if (func->srcidx >= prog->sources.count) + uc_vector_push(&prog->sources, uc_source_get(source)); + func->nargs = 0; func->nupvals = 0; func->srcpos = srcpos; @@ -97,17 +102,21 @@ uc_program_function_load(uc_program_t *prog, size_t id) return NULL; } +uc_source_t * +uc_program_function_source(uc_function_t *fn) +{ + assert(fn->srcidx < fn->program->sources.count); + + return fn->program->sources.entries[fn->srcidx]; +} + size_t uc_program_function_srcpos(uc_function_t *fn, size_t off) { - size_t pos; - if (!fn) return 0; - pos = uc_chunk_debug_get_srcpos(&fn->chunk, off); - - return pos ? fn->srcpos + pos : 0; + return fn->srcpos + uc_chunk_debug_get_srcpos(&fn->chunk, off); } void @@ -224,7 +233,7 @@ enum { static void write_chunk(uc_chunk_t *chunk, FILE *file, uint32_t flags) { - size_t i; + size_t i, slot; /* write bytecode data */ write_vector(chunk, file); @@ -246,9 +255,14 @@ write_chunk(uc_chunk_t *chunk, FILE *file, uint32_t flags) write_u32(chunk->debuginfo.variables.count, file); for (i = 0; i < chunk->debuginfo.variables.count; i++) { + slot = chunk->debuginfo.variables.entries[i].slot; + + if (slot >= ((size_t)-1 / 2)) + slot = ((uint32_t)-1 / 2) + (slot - ((size_t)-1 / 2)); + write_u32(chunk->debuginfo.variables.entries[i].from, file); write_u32(chunk->debuginfo.variables.entries[i].to, file); - write_u32(chunk->debuginfo.variables.entries[i].slot, file); + write_u32(slot, file); write_u32(chunk->debuginfo.variables.entries[i].nameidx, file); } @@ -293,6 +307,7 @@ write_function(uc_function_t *func, FILE *file, bool debug) write_u16(func->nargs, file); write_u16(func->nupvals, file); + write_u32(func->srcidx, file); write_u32(func->srcpos, file); write_chunk(&func->chunk, file, flags); @@ -307,33 +322,38 @@ uc_program_write(uc_program_t *prog, FILE *file, bool debug) if (debug) flags |= UC_PROGRAM_F_DEBUG; - if (debug && prog->source) { + if (debug && prog->sources.count) flags |= UC_PROGRAM_F_SOURCEINFO; - if (prog->source->buffer) - flags |= UC_PROGRAM_F_SOURCEBUF; - } - /* magic word + flags */ write_u32(UC_PRECOMPILED_BYTECODE_MAGIC, file); write_u32(flags, file); + /* write source information */ if (flags & UC_PROGRAM_F_SOURCEINFO) { - /* write source file name */ - write_string(prog->source->filename, file); + write_u32(prog->sources.count, file); + + for (i = 0; i < prog->sources.count; i++) { + /* write source file name */ + write_string(prog->sources.entries[i]->filename, file); - /* include source buffer if program was compiled from stdin */ - if (flags & UC_PROGRAM_F_SOURCEBUF) - write_string(prog->source->buffer, file); + /* include source buffer if program was compiled from stdin */ + if (prog->sources.entries[i]->buffer) + write_string(prog->sources.entries[i]->buffer, file); + else + //write_string("", file); + write_u32(0, file); - /* write lineinfo data */ - write_vector(&prog->source->lineinfo, file); + /* write lineinfo data */ + write_vector(&prog->sources.entries[i]->lineinfo, file); + } } /* write constants */ write_vallist(&prog->constants, file); /* write program sections */ + i = 0; uc_program_function_foreach(prog, fn1) { (void)fn1; i++; @@ -541,57 +561,73 @@ out: } static uc_source_t * -read_sourceinfo(uc_source_t *input, uint32_t flags, char **errp) +read_sourceinfo(uc_source_t *input, uint32_t flags, char **errp, uc_program_t *program) { char *path = NULL, *code = NULL; uc_source_t *source = NULL; - size_t len; + size_t len, count; if (flags & UC_PROGRAM_F_SOURCEINFO) { - if (!read_size_t(input->fp, &len, sizeof(uint32_t), "sourceinfo filename length", errp)) - goto out; + if (!read_size_t(input->fp, &count, sizeof(uint32_t), "amount of source entries", errp)) + return NULL; - path = xalloc(len); + while (count > 0) { + if (!read_size_t(input->fp, &len, sizeof(uint32_t), "sourceinfo filename length", errp)) + return NULL; - if (!read_string(input->fp, path, len, "sourceinfo filename", errp)) - goto out; + path = xalloc(len); + + if (!read_string(input->fp, path, len, "sourceinfo filename", errp)) { + free(path); + + return NULL; + } - if (flags & UC_PROGRAM_F_SOURCEBUF) { if (!read_size_t(input->fp, &len, sizeof(uint32_t), "sourceinfo code buffer length", errp)) - goto out; + return NULL; - code = xalloc(len); + if (len > 0) { + code = xalloc(len); - if (!read_string(input->fp, code, len, "sourceinfo code buffer data", errp)) { - free(code); - goto out; + if (!read_string(input->fp, code, len, "sourceinfo code buffer data", errp)) { + free(code); + free(path); + + return NULL; + } + + source = uc_source_new_buffer(path, code, len); } + else { + source = uc_source_new_file(path); - source = uc_source_new_buffer(path, code, len); - } - else { - source = uc_source_new_file(path); + if (!source) { + fprintf(stderr, "Unable to open source file %s: %s\n", path, strerror(errno)); + source = uc_source_new_buffer(path, xstrdup(""), 0); + } + } + + if (!read_vector(input->fp, &source->lineinfo, "sourceinfo lineinfo", errp)) { + uc_source_put(source); + free(path); - if (!source) { - fprintf(stderr, "Unable to open source file %s: %s\n", path, strerror(errno)); - source = uc_source_new_buffer(path, xstrdup(""), 0); + return NULL; } - } - if (!read_vector(input->fp, &source->lineinfo, "sourceinfo lineinfo", errp)) { - uc_source_put(source); - source = NULL; - goto out; + uc_source_runpath_set(source, input->runpath); + uc_vector_push(&program->sources, source); + + free(path); + + count--; } } else { source = uc_source_new_buffer("[no source]", xstrdup(""), 0); - } - uc_source_runpath_set(source, input->runpath); - -out: - free(path); + uc_source_runpath_set(source, input->runpath); + uc_vector_push(&program->sources, source); + } return source; } @@ -657,6 +693,9 @@ read_chunk(FILE *file, uc_chunk_t *chunk, uint32_t flags, const char *subj, char !read_size_t(file, &varrange->slot, sizeof(uint32_t), subjbuf, errp) || !read_size_t(file, &varrange->nameidx, sizeof(uint32_t), subjbuf, errp)) goto out; + + if (varrange->slot >= ((uint32_t)-1 / 2)) + varrange->slot = ((size_t)-1 / 2) + (varrange->slot - ((uint32_t)-1 / 2)); } snprintf(subjbuf, sizeof(subjbuf), "%s variable names", subj); @@ -697,8 +736,10 @@ out: static bool read_function(FILE *file, uc_program_t *program, size_t idx, char **errp) { + size_t nargs, nupvals, srcidx, srcpos; char subjbuf[64], *name = NULL; uc_function_t *func = NULL; + uc_source_t *source; uint32_t flags, u32; snprintf(subjbuf, sizeof(subjbuf), "function #%zu flags", idx); @@ -722,15 +763,25 @@ read_function(FILE *file, uc_program_t *program, size_t idx, char **errp) snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) arg count and offset", idx, name ? name : "-"); - func = (uc_function_t *)uc_program_function_new(program, name, 0); + if (!read_size_t(file, &nargs, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &nupvals, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &srcidx, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &srcpos, sizeof(uint32_t), subjbuf, errp)) { + goto out; + } + + // FIXME + if (srcidx < program->sources.count) + source = program->sources.entries[srcidx]; + else + source = program->sources.entries[0]; + + func = (uc_function_t *)uc_program_function_new(program, name, source, srcpos); func->arrow = (flags & UC_FUNCTION_F_IS_ARROW); func->vararg = (flags & UC_FUNCTION_F_IS_VARARG); func->strict = (flags & UC_FUNCTION_F_IS_STRICT); - - if (!read_size_t(file, &func->nargs, sizeof(uint16_t), subjbuf, errp) || - !read_size_t(file, &func->nupvals, sizeof(uint16_t), subjbuf, errp) || - !read_size_t(file, &func->srcpos, sizeof(uint32_t), subjbuf, errp)) - goto out; + func->nargs = nargs; + func->nupvals = nupvals; snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) body", idx, name ? name : "-"); @@ -751,7 +802,6 @@ uc_program_t * uc_program_load(uc_source_t *input, char **errp) { uc_program_t *program = NULL; - uc_source_t *source = NULL; uint32_t flags, nfuncs, i; if (!read_u32(input->fp, &i, "file magic", errp)) @@ -765,15 +815,11 @@ uc_program_load(uc_source_t *input, char **errp) if (!read_u32(input->fp, &flags, "program flags", errp)) goto out; - source = read_sourceinfo(input, flags, errp); + program = uc_program_new(); - if (!source) + if (!read_sourceinfo(input, flags, errp, program)) goto out; - program = uc_program_new(source); - - uc_source_put(source); - if (!read_vallist(input->fp, &program->constants, "constants", errp)) goto out; @@ -800,3 +846,24 @@ uc_program_entry(uc_program_t *program) return (uc_function_t *)program->functions.prev; } + +ssize_t +uc_program_export_lookup(uc_program_t *program, uc_source_t *source, uc_value_t *name) +{ + size_t i, off; + ssize_t slot; + + for (i = 0, off = 0; i < program->sources.count; i++) { + if (program->sources.entries[i] != source) { + off += program->sources.entries[i]->exports.count; + continue; + } + + slot = uc_source_export_lookup(source, name); + + if (slot > -1) + return off + slot; + } + + return -1; +} @@ -196,3 +196,28 @@ uc_source_runpath_set(uc_source_t *source, const char *runpath) source->runpath = xstrdup(runpath); } + +bool +uc_source_export_add(uc_source_t *source, uc_value_t *name) +{ + ssize_t idx = uc_source_export_lookup(source, name); + + if (idx > -1) + return false; + + uc_vector_push(&source->exports, ucv_get(name)); + + return true; +} + +ssize_t +uc_source_export_lookup(uc_source_t *source, uc_value_t *name) +{ + size_t i; + + for (i = 0; i < source->exports.count; i++) + if (ucv_is_equal(source->exports.entries[i], name)) + return i; + + return -1; +} diff --git a/tests/custom/04_modules/01_export_variable_declaration b/tests/custom/04_modules/01_export_variable_declaration new file mode 100644 index 0000000..19a1c11 --- /dev/null +++ b/tests/custom/04_modules/01_export_variable_declaration @@ -0,0 +1,29 @@ +Variable declarations can be prepended with `export` to automatically +export each variable using the same name as the variable itself. + +Updates to the variable after the export are reflected properly in +the including scope. + +-- File test-var-decl.uc -- +export let a, b, c; +export let d = 4, e = 5, f = 6; +export const g = 7, h = 8, i = 9; + +a = 1; +b = 2; +c = 3; +-- End -- + +-- Testcase -- +import { a, b, c, d, e, f, g, h, i } from "./files/test-var-decl.uc"; + +print([ a, b, c, d, e, f, g, h, i ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 1, 2, 3, 4, 5, 6, 7, 8, 9 ] +-- End -- diff --git a/tests/custom/04_modules/02_export_function_declaration b/tests/custom/04_modules/02_export_function_declaration new file mode 100644 index 0000000..4067da9 --- /dev/null +++ b/tests/custom/04_modules/02_export_function_declaration @@ -0,0 +1,22 @@ +A named function declaration can be prepended with `export` to +automatically export the function. + +-- File test-func-decl.uc -- +export function func() { + print("Hello, world!\n"); +}; +-- End -- + +-- Testcase -- +import { func } from "./files/test-func-decl.uc"; + +func(); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +Hello, world! +-- End -- diff --git a/tests/custom/04_modules/03_export_list b/tests/custom/04_modules/03_export_list new file mode 100644 index 0000000..8f93f08 --- /dev/null +++ b/tests/custom/04_modules/03_export_list @@ -0,0 +1,27 @@ +Already declared local variables and functions may be exported using the +curly brace export list syntax. + +-- File test-var-decl.uc -- +let testvar = 123; +const testconst = "Test"; + +function testfunc() { + print("Hello, world!\n"); +} + +export { testvar, testconst, testfunc }; +-- End -- + +-- Testcase -- +import { testvar, testconst, testfunc } from "./files/test-var-decl.uc"; + +print([ testvar, testconst, testfunc ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 123, "Test", "function testfunc() { ... }" ] +-- End -- diff --git a/tests/custom/04_modules/04_export_rename b/tests/custom/04_modules/04_export_rename new file mode 100644 index 0000000..49057fd --- /dev/null +++ b/tests/custom/04_modules/04_export_rename @@ -0,0 +1,28 @@ +By using the `as` keyword, exports may be renamed when using the export +list syntax. It is also possible to specify string aliases which are not +valid variable names, in this case a rename on import is mandatory. + +-- File test.uc -- +let testvar = 123; +const testconst = "Test"; + +function testfunc() { + print("Hello, world!\n"); +} + +export { testvar as modvar, testconst as 'define', testfunc as "module-function" }; +-- End -- + +-- Testcase -- +import { modvar, define, "module-function" as func } from "./files/test.uc"; + +print([ modvar, define, func ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 123, "Test", "function testfunc() { ... }" ] +-- End -- diff --git a/tests/custom/04_modules/05_export_default b/tests/custom/04_modules/05_export_default new file mode 100644 index 0000000..a4c8a43 --- /dev/null +++ b/tests/custom/04_modules/05_export_default @@ -0,0 +1,38 @@ +The `export default` statement can be used to declare a default export +value for a module. The value for `export default` can be an arbitrary +expression, it must not refer to a local variable. + +When using the export list syntax, the alias "default" can be used to +designate the default export. + +-- File test-default-expr.uc -- +export default 7 * 21; +-- End -- + +-- File test-default-func.uc -- +export default function() { + return "Hello, world!"; +}; +-- End -- + +-- File test-default-alias.uc -- +let a = 1, b = 2, c = 3; + +export { a, b as default, c }; +-- End -- + +-- Testcase -- +import def1 from "./files/test-default-expr.uc"; +import def2 from "./files/test-default-func.uc"; +import def3 from "./files/test-default-alias.uc"; + +print([ def1, def2(), def3 ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 147, "Hello, world!", 2 ] +-- End -- diff --git a/tests/custom/04_modules/06_export_errors b/tests/custom/04_modules/06_export_errors new file mode 100644 index 0000000..c02a547 --- /dev/null +++ b/tests/custom/04_modules/06_export_errors @@ -0,0 +1,89 @@ +Export statements are only allowed at the toplevel of a module. + +-- Testcase -- +export let x = 1; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Exports may only appear at top level of a module + + `export let x = 1;` + ^-- Near here + + +-- End -- + + +Export statements are not allowed within functions or nested blocks. + +-- Testcase -- +import "./files/test.uc"; +-- End -- + +-- File test.uc -- +{ + export let x = 1; +} +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unable to compile module './files/test.uc': +Syntax error: Exports may only appear at top level of a module +In line 2, byte 2: + + ` export let x = 1;` + ^-- Near here + + + +In line 1, byte 25: + + `import "./files/test.uc";` + Near here --------------^ + + +-- End -- + + +Duplicate export names should result in an error. + +-- Testcase -- +import "./files/test-duplicate.uc"; +-- End -- + +-- File test-duplicate.uc -- +let x = 1, y = 2; + +export { x }; +export { y as x }; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unable to compile module './files/test-duplicate.uc': +Syntax error: Duplicate export 'x' for module './files/test-duplicate.uc' +In line 4, byte 15: + + `export { y as x };` + Near here ----^ + + + +In line 1, byte 35: + + `import "./files/test-duplicate.uc";` + Near here ------------------------^ + + +-- End -- diff --git a/tests/custom/04_modules/07_import_default b/tests/custom/04_modules/07_import_default new file mode 100644 index 0000000..7190a22 --- /dev/null +++ b/tests/custom/04_modules/07_import_default @@ -0,0 +1,99 @@ +An `import` statement with a sole label will import the modules default +export and bind it to a local variable named after the label. + +-- Testcase -- +import defVal from "./files/test1.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test1.uc -- +export default "This is the default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +This is the default export +-- End -- + + +Attemping to import a default export from a module without default +export will raise an error. + +-- Testcase -- +import defVal from "./files/test2.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test2.uc -- +export const x = "This is a non-default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Module ./files/test2.uc has no default export +In line 1, byte 20: + + `import defVal from "./files/test2.uc";` + Near here ---------^ + + +-- End -- + + +In import statements usign the list syntax, the `default` keyword can be +used to refer to default exports. + +-- Testcase -- +import { default as defVal } from "./files/test3.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test3.uc -- +export default "This is the default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +This is the default export +-- End -- + + +When using the default keyword within the list syntax, the `as` keyword is +mandatory to assign a non-reserved keyword as name. + +-- Testcase -- +import { default } from "./files/test4.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test4.uc -- +export default "This is the default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unexpected token +Expecting 'as' +In line 1, byte 18: + + `import { default } from "./files/test4.uc";` + Near here -------^ + + +-- End -- diff --git a/tests/custom/04_modules/08_import_list b/tests/custom/04_modules/08_import_list new file mode 100644 index 0000000..1a4f116 --- /dev/null +++ b/tests/custom/04_modules/08_import_list @@ -0,0 +1,105 @@ +An `import` statement followed by a curly brace enclosed list of names +will import the corresponding exports from the module. + +-- Testcase -- +import { a, b, c } from "./files/test1.uc"; + +print([ a, b, c ], "\n"); +-- End -- + +-- File test1.uc -- +export const a = 1, b = 2, c = 3; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 1, 2, 3 ] +-- End -- + + +Attemping to import a not exported name will raise an error. + +-- Testcase -- +import y from "./files/test2.uc"; + +print(y, "\n"); +-- End -- + +-- File test2.uc -- +export const x = "This is a test"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Module ./files/test2.uc has no default export +In line 1, byte 15: + + `import y from "./files/test2.uc";` + Near here ----^ + + +-- End -- + + +Imports may be renamed to assign an alternative local name to the +exported module symbols. Renaming is also required for string export +names which are no valid variable identifiers. + +-- Testcase -- +import { a as var1, bool as var2, "my function" as var3 } from "./files/test3.uc"; + +print([ var1, var2, var3 ], "\n"); +-- End -- + +-- File test3.uc -- +const a = "A string"; + +let b = 123; + +function c() { + return "A function" +} + +export { + a, + b as bool, + c as "my function" +}; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ "A string", 123, "function c() { ... }" ] +-- End -- + + +A list expression may follow a default import expression in an `import` +statment. + +-- Testcase -- +import defVal, { a as x, b as y, c as z } from "./files/test4.uc"; + +print([defVal, x, y, z], "\n"); +-- End -- + +-- File test4.uc -- +export const a = 1, b = 2, c = 3; +export default a + b + c; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 6, 1, 2, 3 ] +-- End -- diff --git a/tests/custom/04_modules/09_import_wildcard b/tests/custom/04_modules/09_import_wildcard new file mode 100644 index 0000000..aa3dc82 --- /dev/null +++ b/tests/custom/04_modules/09_import_wildcard @@ -0,0 +1,73 @@ +By specifying `*` instead of a label or an import list after an `import` +keyword, all of the modules exports are aggregated into an object whose +keys and values refer to the exported names and their corresponding +values respectively. + +-- Testcase -- +import * as mod from "./files/test1.uc"; + +print(mod, "\n"); +-- End -- + +-- File test1.uc -- +export const a = 1, b = 2, c = 3; +export default a + b + c; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +{ "a": 1, "b": 2, "c": 3, "default": 6 } +-- End -- + + +When using the wildcard import syntax, assigning a name using the `as` +expression is mandatory. + +-- Testcase -- +import * from "./files/test2.uc"; +-- End -- + +-- File test2.uc -- +export const x = "This is a test"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unexpected token +Expecting 'as' +In line 1, byte 10: + + `import * from "./files/test2.uc";` + ^-- Near here + + +-- End -- + + +A wildcard expression may follow a default import expression in an `import` +statment. + +-- Testcase -- +import defVal, * as mod from "./files/test3.uc"; + +print([defVal, mod], "\n"); +-- End -- + +-- File test3.uc -- +export const a = 1, b = 2, c = 3; +export default a + b + c; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 6, { "a": 1, "b": 2, "c": 3, "default": 6 } ] +-- End -- diff --git a/tests/custom/04_modules/10_import_none b/tests/custom/04_modules/10_import_none new file mode 100644 index 0000000..be30106 --- /dev/null +++ b/tests/custom/04_modules/10_import_none @@ -0,0 +1,18 @@ +An `import` statement may omit a default name, wildcard expression or name +lsit entirely to execute a module code solely for its side effects. + +-- Testcase -- +import "./files/test.uc"; +-- End -- + +-- File test.uc -- +print("This is the test module running\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +This is the test module running +-- End -- diff --git a/tests/custom/04_modules/11_import_many_exec_once b/tests/custom/04_modules/11_import_many_exec_once new file mode 100644 index 0000000..f469c7f --- /dev/null +++ b/tests/custom/04_modules/11_import_many_exec_once @@ -0,0 +1,28 @@ +When multiple imports refer to the same module, the module will only be +executed once. The equivalence of module paths is tested after canonicalizing +the requested path. + +-- Testcase -- +import { counter as counter1 } from "./files/test/example.uc"; +import { counter as counter2 } from "files/test/example.uc"; +import { counter as counter3 } from "test.example"; + +print([ counter1, counter2, counter3 ], "\n"); +-- End -- + +-- File test/example.uc -- +print("This is the test module running\n"); + +export let counter = 0; + +counter++; +-- End -- + +-- Args -- +-R -L ./files +-- End -- + +-- Expect stdout -- +This is the test module running +[ 1, 1, 1 ] +-- End -- diff --git a/tests/custom/04_modules/12_import_immutability b/tests/custom/04_modules/12_import_immutability new file mode 100644 index 0000000..37c0bc6 --- /dev/null +++ b/tests/custom/04_modules/12_import_immutability @@ -0,0 +1,52 @@ +Module imports are read-only bindings to the exported module variables. + +-- Testcase -- +import { a } from "./files/test.uc"; + +a = 2; +-- End -- + +-- File test.uc -- +export let a = 1; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Invalid assignment to constant 'a' +In line 3, byte 5: + + `a = 2;` + ^-- Near here + + +-- End -- + + +Aggregated module objects are read-only as well. + +-- Testcase -- +import * as mod from "./files/test.uc"; + +mod.a = 2; +-- End -- + +-- File test.uc -- +export let a = 1; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Type error: object value is immutable +In line 3, byte 9: + + `mod.a = 2;` + ^-- Near here + + +-- End -- diff --git a/tests/custom/04_modules/13_import_liveness b/tests/custom/04_modules/13_import_liveness new file mode 100644 index 0000000..ca7ff35 --- /dev/null +++ b/tests/custom/04_modules/13_import_liveness @@ -0,0 +1,29 @@ +Imported bindings to exported module variables are live, they'll reflect +every change to the exported variable values. + +-- Testcase -- +import { counter, count } from "./files/test.uc"; + +print(counter, "\n"); +count(); +print(counter, "\n"); +-- End -- + +-- File test.uc -- +let counter = 1; + +function count() { + counter++; +} + +export { counter, count }; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +1 +2 +-- End -- diff --git a/tests/custom/04_bugs/01_try_catch_stack_mismatch b/tests/custom/99_bugs/01_try_catch_stack_mismatch index f6e5a0a..f6e5a0a 100644 --- a/tests/custom/04_bugs/01_try_catch_stack_mismatch +++ b/tests/custom/99_bugs/01_try_catch_stack_mismatch diff --git a/tests/custom/04_bugs/02_array_pop_use_after_free b/tests/custom/99_bugs/02_array_pop_use_after_free index 22f63ff..22f63ff 100644 --- a/tests/custom/04_bugs/02_array_pop_use_after_free +++ b/tests/custom/99_bugs/02_array_pop_use_after_free diff --git a/tests/custom/04_bugs/03_switch_fallthrough_miscompilation b/tests/custom/99_bugs/03_switch_fallthrough_miscompilation index 3e6410e..3e6410e 100644 --- a/tests/custom/04_bugs/03_switch_fallthrough_miscompilation +++ b/tests/custom/99_bugs/03_switch_fallthrough_miscompilation diff --git a/tests/custom/04_bugs/04_property_set_abort b/tests/custom/99_bugs/04_property_set_abort index 8af477f..8af477f 100644 --- a/tests/custom/04_bugs/04_property_set_abort +++ b/tests/custom/99_bugs/04_property_set_abort diff --git a/tests/custom/04_bugs/05_duplicate_resource_type b/tests/custom/99_bugs/05_duplicate_resource_type index 6d8d8f5..6d8d8f5 100644 --- a/tests/custom/04_bugs/05_duplicate_resource_type +++ b/tests/custom/99_bugs/05_duplicate_resource_type diff --git a/tests/custom/04_bugs/06_lexer_escape_at_boundary b/tests/custom/99_bugs/06_lexer_escape_at_boundary index e80b0a1..e80b0a1 100644 --- a/tests/custom/04_bugs/06_lexer_escape_at_boundary +++ b/tests/custom/99_bugs/06_lexer_escape_at_boundary diff --git a/tests/custom/04_bugs/07_lexer_overlong_lines b/tests/custom/99_bugs/07_lexer_overlong_lines index d2dd3be..d2dd3be 100644 --- a/tests/custom/04_bugs/07_lexer_overlong_lines +++ b/tests/custom/99_bugs/07_lexer_overlong_lines diff --git a/tests/custom/04_bugs/08_compiler_arrow_fn_expressions b/tests/custom/99_bugs/08_compiler_arrow_fn_expressions index 5cd8960..5cd8960 100644 --- a/tests/custom/04_bugs/08_compiler_arrow_fn_expressions +++ b/tests/custom/99_bugs/08_compiler_arrow_fn_expressions diff --git a/tests/custom/04_bugs/09_reject_invalid_array_indexes b/tests/custom/99_bugs/09_reject_invalid_array_indexes index a7e5272..a7e5272 100644 --- a/tests/custom/04_bugs/09_reject_invalid_array_indexes +++ b/tests/custom/99_bugs/09_reject_invalid_array_indexes diff --git a/tests/custom/04_bugs/10_break_stack_mismatch b/tests/custom/99_bugs/10_break_stack_mismatch index c9c82c5..c9c82c5 100644 --- a/tests/custom/04_bugs/10_break_stack_mismatch +++ b/tests/custom/99_bugs/10_break_stack_mismatch diff --git a/tests/custom/04_bugs/11_switch_stack_mismatch b/tests/custom/99_bugs/11_switch_stack_mismatch index 0cf82f0..0cf82f0 100644 --- a/tests/custom/04_bugs/11_switch_stack_mismatch +++ b/tests/custom/99_bugs/11_switch_stack_mismatch diff --git a/tests/custom/04_bugs/12_altblock_stack_mismatch b/tests/custom/99_bugs/12_altblock_stack_mismatch index e350660..e350660 100644 --- a/tests/custom/04_bugs/12_altblock_stack_mismatch +++ b/tests/custom/99_bugs/12_altblock_stack_mismatch diff --git a/tests/custom/04_bugs/13_split_by_string_leading_trailing b/tests/custom/99_bugs/13_split_by_string_leading_trailing index 10a6062..10a6062 100644 --- a/tests/custom/04_bugs/13_split_by_string_leading_trailing +++ b/tests/custom/99_bugs/13_split_by_string_leading_trailing diff --git a/tests/custom/04_bugs/14_incomplete_expression_at_eof b/tests/custom/99_bugs/14_incomplete_expression_at_eof index 474e87c..474e87c 100644 --- a/tests/custom/04_bugs/14_incomplete_expression_at_eof +++ b/tests/custom/99_bugs/14_incomplete_expression_at_eof diff --git a/tests/custom/04_bugs/15_segfault_on_prefix_increment b/tests/custom/99_bugs/15_segfault_on_prefix_increment index 280b680..280b680 100644 --- a/tests/custom/04_bugs/15_segfault_on_prefix_increment +++ b/tests/custom/99_bugs/15_segfault_on_prefix_increment diff --git a/tests/custom/04_bugs/16_hang_on_regexp_at_eof b/tests/custom/99_bugs/16_hang_on_regexp_at_eof index d8702ca..d8702ca 100644 --- a/tests/custom/04_bugs/16_hang_on_regexp_at_eof +++ b/tests/custom/99_bugs/16_hang_on_regexp_at_eof diff --git a/tests/custom/04_bugs/17_hang_on_unclosed_expression_block b/tests/custom/99_bugs/17_hang_on_unclosed_expression_block index 25128bb..29553ab 100644 --- a/tests/custom/04_bugs/17_hang_on_unclosed_expression_block +++ b/tests/custom/99_bugs/17_hang_on_unclosed_expression_block @@ -3,7 +3,7 @@ infinite loop. -- Expect stderr -- Syntax error: Unterminated template block -In line 1, byte 6: +In line 1, byte 5: `{{ 1` ^-- Near here diff --git a/tests/custom/04_bugs/18_hang_on_line_comments_at_eof b/tests/custom/99_bugs/18_hang_on_line_comments_at_eof index 957ed47..5fc811e 100644 --- a/tests/custom/04_bugs/18_hang_on_line_comments_at_eof +++ b/tests/custom/99_bugs/18_hang_on_line_comments_at_eof @@ -2,7 +2,7 @@ When parsing a comment near EOF, or a comment escaping the end of an expression block, the lexer did end up in an infinite loop. -- Expect stderr -- -Syntax error: Expecting expression +Syntax error: Unterminated template block In line 1, byte 9: `{{ // }}` diff --git a/tests/custom/04_bugs/19_truncated_format_string b/tests/custom/99_bugs/19_truncated_format_string index ead0fdb..ead0fdb 100644 --- a/tests/custom/04_bugs/19_truncated_format_string +++ b/tests/custom/99_bugs/19_truncated_format_string diff --git a/tests/custom/04_bugs/20_use_strict_stack_mismatch b/tests/custom/99_bugs/20_use_strict_stack_mismatch index 7294d23..7294d23 100644 --- a/tests/custom/04_bugs/20_use_strict_stack_mismatch +++ b/tests/custom/99_bugs/20_use_strict_stack_mismatch diff --git a/tests/custom/04_bugs/21_compiler_parenthesized_prop_keyword b/tests/custom/99_bugs/21_compiler_parenthesized_prop_keyword index 472b2af..472b2af 100644 --- a/tests/custom/04_bugs/21_compiler_parenthesized_prop_keyword +++ b/tests/custom/99_bugs/21_compiler_parenthesized_prop_keyword diff --git a/tests/custom/04_bugs/22_compiler_break_continue_scoping b/tests/custom/99_bugs/22_compiler_break_continue_scoping index 461b144..461b144 100644 --- a/tests/custom/04_bugs/22_compiler_break_continue_scoping +++ b/tests/custom/99_bugs/22_compiler_break_continue_scoping diff --git a/tests/custom/04_bugs/23_compiler_parenthesized_division b/tests/custom/99_bugs/23_compiler_parenthesized_division index a70703f..a70703f 100644 --- a/tests/custom/04_bugs/23_compiler_parenthesized_division +++ b/tests/custom/99_bugs/23_compiler_parenthesized_division diff --git a/tests/custom/04_bugs/24_compiler_local_for_loop_declaration b/tests/custom/99_bugs/24_compiler_local_for_loop_declaration index aafde55..aafde55 100644 --- a/tests/custom/04_bugs/24_compiler_local_for_loop_declaration +++ b/tests/custom/99_bugs/24_compiler_local_for_loop_declaration diff --git a/tests/custom/04_bugs/25_lexer_shifted_offsets b/tests/custom/99_bugs/25_lexer_shifted_offsets index db10121..db10121 100644 --- a/tests/custom/04_bugs/25_lexer_shifted_offsets +++ b/tests/custom/99_bugs/25_lexer_shifted_offsets diff --git a/tests/custom/04_bugs/26_compiler_jmp_to_zero b/tests/custom/99_bugs/26_compiler_jmp_to_zero index e7e0127..e7e0127 100644 --- a/tests/custom/04_bugs/26_compiler_jmp_to_zero +++ b/tests/custom/99_bugs/26_compiler_jmp_to_zero diff --git a/tests/custom/04_bugs/27_invalid_sparse_array_set b/tests/custom/99_bugs/27_invalid_sparse_array_set index 4c47039..4c47039 100644 --- a/tests/custom/04_bugs/27_invalid_sparse_array_set +++ b/tests/custom/99_bugs/27_invalid_sparse_array_set diff --git a/tests/custom/04_bugs/28_null_equality b/tests/custom/99_bugs/28_null_equality index b71a3b1..b71a3b1 100644 --- a/tests/custom/04_bugs/28_null_equality +++ b/tests/custom/99_bugs/28_null_equality diff --git a/tests/custom/04_bugs/29_empty_string_as_number b/tests/custom/99_bugs/29_empty_string_as_number index 51a93b2..51a93b2 100644 --- a/tests/custom/04_bugs/29_empty_string_as_number +++ b/tests/custom/99_bugs/29_empty_string_as_number diff --git a/tests/custom/04_bugs/30_nan_strict_equality b/tests/custom/99_bugs/30_nan_strict_equality index 4ec32e2..4ec32e2 100644 --- a/tests/custom/04_bugs/30_nan_strict_equality +++ b/tests/custom/99_bugs/30_nan_strict_equality diff --git a/tests/custom/04_bugs/31_vallist_8bit_shortstrings b/tests/custom/99_bugs/31_vallist_8bit_shortstrings index 9d02f42..9d02f42 100644 --- a/tests/custom/04_bugs/31_vallist_8bit_shortstrings +++ b/tests/custom/99_bugs/31_vallist_8bit_shortstrings diff --git a/tests/custom/04_bugs/32_compiler_switch_patchlist_corruption b/tests/custom/99_bugs/32_compiler_switch_patchlist_corruption index d256de5..d256de5 100644 --- a/tests/custom/04_bugs/32_compiler_switch_patchlist_corruption +++ b/tests/custom/99_bugs/32_compiler_switch_patchlist_corruption diff --git a/tests/custom/04_bugs/33_vm_computed_prop_decl_crash b/tests/custom/99_bugs/33_vm_computed_prop_decl_crash index 60b276c..60b276c 100644 --- a/tests/custom/04_bugs/33_vm_computed_prop_decl_crash +++ b/tests/custom/99_bugs/33_vm_computed_prop_decl_crash diff --git a/tests/custom/04_bugs/34_dirname_off_by_one b/tests/custom/99_bugs/34_dirname_off_by_one index 34ef7c7..34ef7c7 100644 --- a/tests/custom/04_bugs/34_dirname_off_by_one +++ b/tests/custom/99_bugs/34_dirname_off_by_one diff --git a/tests/custom/04_bugs/35_vm_callframe_double_free b/tests/custom/99_bugs/35_vm_callframe_double_free index bb816eb..bb816eb 100644 --- a/tests/custom/04_bugs/35_vm_callframe_double_free +++ b/tests/custom/99_bugs/35_vm_callframe_double_free diff --git a/tests/custom/04_bugs/36_vm_nested_call_return b/tests/custom/99_bugs/36_vm_nested_call_return index 6a52b78..6a52b78 100644 --- a/tests/custom/04_bugs/36_vm_nested_call_return +++ b/tests/custom/99_bugs/36_vm_nested_call_return diff --git a/tests/custom/04_bugs/37_compiler_unexpected_unary_op b/tests/custom/99_bugs/37_compiler_unexpected_unary_op index e652319..e652319 100644 --- a/tests/custom/04_bugs/37_compiler_unexpected_unary_op +++ b/tests/custom/99_bugs/37_compiler_unexpected_unary_op diff --git a/tests/custom/04_bugs/38_index_segfault b/tests/custom/99_bugs/38_index_segfault index e29b99f..e29b99f 100644 --- a/tests/custom/04_bugs/38_index_segfault +++ b/tests/custom/99_bugs/38_index_segfault diff --git a/tests/custom/04_bugs/39_compiler_switch_continue_mismatch b/tests/custom/99_bugs/39_compiler_switch_continue_mismatch index c9b9ec6..c9b9ec6 100644 --- a/tests/custom/04_bugs/39_compiler_switch_continue_mismatch +++ b/tests/custom/99_bugs/39_compiler_switch_continue_mismatch diff --git a/tests/custom/04_bugs/40_lexer_bug_on_lstrip_off b/tests/custom/99_bugs/40_lexer_bug_on_lstrip_off index dc4f8dd..dc4f8dd 100644 --- a/tests/custom/04_bugs/40_lexer_bug_on_lstrip_off +++ b/tests/custom/99_bugs/40_lexer_bug_on_lstrip_off diff --git a/tests/custom/run_tests.sh b/tests/custom/run_tests.sh index fb92379..96ac783 100755 --- a/tests/custom/run_tests.sh +++ b/tests/custom/run_tests.sh @@ -105,6 +105,8 @@ run_testcase() { printf "%d\n" $? > "$dir/res.code" touch "$dir/empty" + sed -i -e "s#$dir#.#g" "$dir/res.out" "$dir/res.err" + if ! cmp -s "$dir/res.err" "${err:-$dir/empty}"; then [ $fail = 0 ] && printf "!\n" printf "Testcase #%d: Expected stderr did not match:\n" $num @@ -30,6 +30,15 @@ #include "ucode/vm.h" #include "ucode/program.h" +static char *uc_default_search_path[] = { LIB_SEARCH_PATH }; + +uc_parse_config_t uc_default_parse_config = { + .module_search_path = { + .count = ARRAY_SIZE(uc_default_search_path), + .entries = uc_default_search_path + } +}; + uc_type_t ucv_type(uc_value_t *uv) { @@ -191,8 +200,8 @@ ucv_gc_mark(uc_value_t *uv) case UC_PROGRAM: program = (uc_program_t *)uv; - if (program->source) - ucv_gc_mark(&program->source->header); + for (i = 0; i < program->sources.count; i++) + ucv_gc_mark(&program->sources.entries[i]->header); break; @@ -283,7 +292,11 @@ ucv_free(uc_value_t *uv, bool retain) uc_program_function_free(func); uc_vallist_free(&program->constants); - ucv_put_value(&program->source->header, retain); + + for (i = 0; i < program->sources.count; i++) + ucv_put_value(&program->sources.entries[i]->header, retain); + + uc_vector_clear(&program->sources); break; case UC_SOURCE: @@ -292,7 +305,11 @@ ucv_free(uc_value_t *uv, bool retain) if (source->runpath != source->filename) free(source->runpath); + for (i = 0; i < source->exports.count; i++) + ucv_put(source->exports.entries[i]); + uc_vector_clear(&source->lineinfo); + uc_vector_clear(&source->exports); fclose(source->fp); free(source->buffer); break; @@ -470,7 +487,7 @@ ucv_int64_new(int64_t n) integer = xalloc(sizeof(*integer)); integer->header.type = UC_INTEGER; integer->header.refcount = 1; - integer->header.u64 = 0; + integer->header.u64_or_constant = 0; integer->i.s64 = n; return &integer->header; @@ -492,7 +509,7 @@ ucv_uint64_new(uint64_t n) integer = xalloc(sizeof(*integer)); integer->header.type = UC_INTEGER; integer->header.refcount = 1; - integer->header.u64 = 1; + integer->header.u64_or_constant = 1; integer->i.u64 = n; return &integer->header; @@ -520,7 +537,7 @@ ucv_uint64_get(uc_value_t *uv) case UC_INTEGER: integer = (uc_integer_t *)uv; - if (integer->header.u64) + if (integer->header.u64_or_constant) return integer->i.u64; if (integer->i.s64 >= 0) @@ -574,10 +591,10 @@ ucv_int64_get(uc_value_t *uv) case UC_INTEGER: integer = (uc_integer_t *)uv; - if (integer->header.u64 && integer->i.u64 <= (uint64_t)INT64_MAX) + if (integer->header.u64_or_constant && integer->i.u64 <= (uint64_t)INT64_MAX) return (int64_t)integer->i.u64; - if (!integer->header.u64) + if (!integer->header.u64_or_constant) return integer->i.s64; errno = ERANGE; @@ -715,7 +732,7 @@ ucv_array_push(uc_value_t *uv, uc_value_t *item) { uc_array_t *array = (uc_array_t *)uv; - if (ucv_type(uv) != UC_ARRAY) + if (ucv_type(uv) != UC_ARRAY || uv->u64_or_constant) return NULL; ucv_array_set(uv, array->count, item); @@ -899,7 +916,7 @@ ucv_object_add(uc_value_t *uv, const char *key, uc_value_t *val) unsigned long hash; void *k; - if (ucv_type(uv) != UC_OBJECT) + if (ucv_type(uv) != UC_OBJECT || uv->u64_or_constant) return false; hash = lh_get_hash(object->table, (const void *)key); @@ -932,7 +949,7 @@ ucv_object_delete(uc_value_t *uv, const char *key) { uc_object_t *object = (uc_object_t *)uv; - if (ucv_type(uv) != UC_OBJECT) + if (ucv_type(uv) != UC_OBJECT || uv->u64_or_constant) return false; return (lh_table_delete(object->table, key) == 0); @@ -1487,6 +1504,7 @@ ucv_to_stringbuf_formatted(uc_vm_t *vm, uc_stringbuf_t *pb, uc_value_t *uv, size uc_closure_t *closure; uc_regexp_t *regexp; uc_value_t *argname; + uc_upvalref_t *ref; uc_array_t *array; size_t i, l; double d; @@ -1686,10 +1704,17 @@ ucv_to_stringbuf_formatted(uc_vm_t *vm, uc_stringbuf_t *pb, uc_value_t *uv, size break; case UC_UPVALUE: - ucv_stringbuf_printf(pb, "%s<upvalref %p>%s", - json ? "\"" : "", - uv, - json ? "\"" : ""); + ref = (uc_upvalref_t *)uv; + + if (ref->closed) + ucv_to_stringbuf_formatted(vm, pb, ref->value, depth, pad_char, pad_size); + else if (vm != NULL && ref->slot < vm->stack.count) + ucv_to_stringbuf_formatted(vm, pb, vm->stack.entries[ref->slot], depth, pad_char, pad_size); + else + ucv_stringbuf_printf(pb, "%s<upvalref %p>%s", + json ? "\"" : "", + uv, + json ? "\"" : ""); break; @@ -2229,3 +2254,12 @@ ucv_freeall(uc_vm_t *vm) { ucv_gc_common(vm, true); } + +void +uc_search_path_init(uc_search_path_t *search_path) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(uc_default_search_path); i++) + uc_vector_push(search_path, xstrdup(uc_default_search_path[i])); +} @@ -73,6 +73,9 @@ static const int8_t insn_operand_bytes[__I_MAX] = { [I_MCALL] = 4, [I_QCALL] = 4, [I_QMCALL] = 4, + + [I_IMPORT] = 4, + [I_EXPORT] = 4 }; static const char *exception_type_strings[] = { @@ -108,7 +111,6 @@ uc_vm_reset_callframes(uc_vm_t *vm) static uc_value_t * uc_vm_alloc_global_scope(uc_vm_t *vm) { - const char *path[] = { LIB_SEARCH_PATH }; uc_value_t *scope, *arr; size_t i; @@ -117,8 +119,8 @@ uc_vm_alloc_global_scope(uc_vm_t *vm) /* build default require() search path */ arr = ucv_array_new(vm); - for (i = 0; i < ARRAY_SIZE(path); i++) - ucv_array_push(arr, ucv_string_new(path[i])); + for (i = 0; i < vm->config->module_search_path.count; i++) + ucv_array_push(arr, ucv_string_new(vm->config->module_search_path.entries[i])); /* register module related constants */ ucv_object_add(scope, "REQUIRE_SEARCH_PATH", arr); @@ -144,7 +146,7 @@ void uc_vm_init(uc_vm_t *vm, uc_parse_config_t *config) vm->exception.type = EXCEPTION_NONE; vm->exception.message = NULL; - vm->config = config; + vm->config = config ? config : &uc_default_parse_config; vm->open_upvals = NULL; @@ -181,6 +183,9 @@ void uc_vm_free(uc_vm_t *vm) for (i = 0; i < vm->restypes.count; i++) ucv_put(vm->restypes.entries[i]->proto); + for (i = 0; i < vm->exports.count; i++) + ucv_put(&vm->exports.entries[i]->header); + uc_vm_reset_callframes(vm); uc_vm_reset_stack(vm); uc_vector_clear(&vm->stack); @@ -194,6 +199,7 @@ void uc_vm_free(uc_vm_t *vm) free(vm->restypes.entries[i]); uc_vector_clear(&vm->restypes); + uc_vector_clear(&vm->exports); } static uc_chunk_t * @@ -211,7 +217,7 @@ uc_vm_frame_program(uc_callframe_t *frame) static uc_source_t * uc_vm_frame_source(uc_callframe_t *frame) { - return frame->closure ? frame->closure->function->program->source : NULL; + return frame->closure ? uc_program_function_source(frame->closure->function) : NULL; } static uc_callframe_t * @@ -345,7 +351,10 @@ uc_vm_frame_dump(uc_vm_t *vm, uc_callframe_t *frame) fprintf(stderr, " [%zu] <%p> %s ", i, (void *)ref, uc_vm_format_val(vm, v)); - if (ref->closed) { + if (!ref) { + fprintf(stderr, "{unresolved}\n"); + } + else if (ref->closed) { fprintf(stderr, "{closed} %s\n", uc_vm_format_val(vm, ref->value)); } @@ -360,20 +369,43 @@ uc_vm_frame_dump(uc_vm_t *vm, uc_callframe_t *frame) } } +static uc_value_t * +uc_vm_resolve_upval(uc_vm_t *vm, uc_value_t *value) +{ + uc_upvalref_t *ref; + +#ifdef __clang_analyzer__ + /* Clang static analyzer does not understand that ucv_type(NULL) can't + * possibly yield UC_UPVALUE. Nudge it. */ + if (value != NULL && ucv_type(value) == UC_UPVALUE) +#else + if (ucv_type(value) == UC_UPVALUE) +#endif + { + ref = (uc_upvalref_t *)value; + + if (ref->closed) + return ucv_get(ref->value); + else + return ucv_get(vm->stack.entries[ref->slot]); + } + + return value; +} + void uc_vm_stack_push(uc_vm_t *vm, uc_value_t *value) { uc_vector_grow(&vm->stack); ucv_put(vm->stack.entries[vm->stack.count]); - - vm->stack.entries[vm->stack.count] = value; + vm->stack.entries[vm->stack.count] = uc_vm_resolve_upval(vm, value); vm->stack.count++; if (vm->trace) { fprintf(stderr, " [+%zd] %s\n", vm->stack.count - 1, - uc_vm_format_val(vm, value)); + uc_vm_format_val(vm, vm->stack.entries[vm->stack.count - 1])); } } @@ -829,7 +861,7 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) if (frame->closure) { function = frame->closure->function; - source = function->program->source; + source = uc_program_function_source(function); off = (frame->ip - uc_vm_frame_chunk(frame)->entries) - 1; srcpos = uc_program_function_srcpos(function, off); @@ -1204,6 +1236,21 @@ uc_vm_insn_store_var(uc_vm_t *vm, uc_vm_insn_t insn) uc_vm_stack_push(vm, v); } +static bool +assert_mutable_value(uc_vm_t *vm, uc_value_t *val) +{ + if (ucv_is_constant(val)) { + uc_vm_stack_push(vm, NULL); + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "%s value is immutable", + ucv_typename(val)); + + return false; + } + + return true; +} + static void uc_vm_insn_store_val(uc_vm_t *vm, uc_vm_insn_t insn) { @@ -1214,7 +1261,9 @@ uc_vm_insn_store_val(uc_vm_t *vm, uc_vm_insn_t insn) switch (ucv_type(o)) { case UC_OBJECT: case UC_ARRAY: - uc_vm_stack_push(vm, ucv_key_set(vm, o, k, v)); + if (assert_mutable_value(vm, o)) + uc_vm_stack_push(vm, ucv_key_set(vm, o, k, v)); + break; default: @@ -1684,8 +1733,11 @@ uc_vm_insn_update_val(uc_vm_t *vm, uc_vm_insn_t insn) switch (ucv_type(v)) { case UC_OBJECT: case UC_ARRAY: - val = ucv_key_get(vm, v, k); - uc_vm_stack_push(vm, ucv_key_set(vm, v, k, uc_vm_value_arith(vm, vm->arg.u8, val, inc))); + if (assert_mutable_value(vm, v)) { + val = ucv_key_get(vm, v, k); + uc_vm_stack_push(vm, ucv_key_set(vm, v, k, uc_vm_value_arith(vm, vm->arg.u8, val, inc))); + } + break; default: @@ -2227,7 +2279,7 @@ uc_vm_insn_mcall(uc_vm_t *vm, uc_vm_insn_t insn) size_t key_slot = vm->stack.count - (vm->arg.u32 & 0xffff) - 1; uc_value_t *ctx = vm->stack.entries[key_slot - 1]; uc_value_t *key = vm->stack.entries[key_slot]; - uc_value_t *fno = ucv_key_get(vm, ctx, key); + uc_value_t *fno = uc_vm_resolve_upval(vm, ucv_key_get(vm, ctx, key)); if (!ucv_is_callable(fno) && insn == I_QMCALL) return uc_vm_skip_call(vm, true); @@ -2280,8 +2332,11 @@ uc_vm_insn_delete(uc_vm_t *vm, uc_vm_insn_t insn) switch (ucv_type(v)) { case UC_OBJECT: - rv = ucv_key_delete(vm, v, k); - uc_vm_stack_push(vm, ucv_boolean_new(rv)); + if (assert_mutable_value(vm, v)) { + rv = ucv_key_delete(vm, v, k); + uc_vm_stack_push(vm, ucv_boolean_new(rv)); + } + break; default: @@ -2296,6 +2351,74 @@ uc_vm_insn_delete(uc_vm_t *vm, uc_vm_insn_t insn) ucv_put(v); } +static void +uc_vm_insn_import(uc_vm_t *vm, uc_vm_insn_t insn) +{ + uc_callframe_t *frame = uc_vm_current_frame(vm); + uint16_t from = vm->arg.u32 & 0xffff; + uint16_t to = vm->arg.u32 >> 16; + uc_value_t *name, *modobj; + uint32_t cidx; + + /* is a wildcard import * from ... */ + if (to == 0xffff) { + to = from; + modobj = ucv_object_new(vm); + + /* instruction is followed by u16 containing the offset of the + * first module export and `from` times u32 values containing + * the constant indexes of the names */ + for (from = frame->ip[0] * 0x100 + frame->ip[1], frame->ip += 2; + from < to && from < vm->exports.count; + from++) { + + cidx = ( + frame->ip[0] * 0x1000000UL + + frame->ip[1] * 0x10000UL + + frame->ip[2] * 0x100UL + + frame->ip[3] + ); + + frame->ip += 4; + + name = uc_program_get_constant(uc_vm_current_program(vm), cidx); + + if (ucv_type(name) == UC_STRING && vm->exports.entries[from]) + ucv_object_add(modobj, ucv_string_get(name), + ucv_get(&vm->exports.entries[from]->header)); + + ucv_put(name); + } + + ucv_set_constant(modobj, true); + + uc_vm_stack_push(vm, modobj); + } + + /* module export available, patch into upvalue */ + else if (from < vm->exports.count && vm->exports.entries[from]) { + frame->closure->upvals[to] = vm->exports.entries[from]; + ucv_get(&vm->exports.entries[from]->header); + } + + /* module export missing, e.g. due to premature return in module, + * patch up dummy upvalue ref with `null` value */ + else { + frame->closure->upvals[to] = (uc_upvalref_t *)ucv_upvalref_new(0); + frame->closure->upvals[to]->closed = true; + } +} + +static void +uc_vm_insn_export(uc_vm_t *vm, uc_vm_insn_t insn) +{ + uc_callframe_t *frame = uc_vm_current_frame(vm); + uc_upvalref_t *ref = uc_vm_capture_upval(vm, frame->stackframe + vm->arg.u32); + + uc_vector_push(&vm->exports, ref); + ucv_get(&ref->header); +} + static uc_value_t * uc_vm_callframe_pop(uc_vm_t *vm) { @@ -2583,6 +2706,14 @@ uc_vm_execute_chunk(uc_vm_t *vm) uc_vm_insn_delete(vm, insn); break; + case I_IMPORT: + uc_vm_insn_import(vm, insn); + break; + + case I_EXPORT: + uc_vm_insn_export(vm, insn); + break; + default: uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "unknown opcode %d", insn); break; |