diff options
author | Jo-Philipp Wich <jo@mein.io> | 2021-05-14 18:45:10 +0200 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2021-05-14 18:46:31 +0200 |
commit | 1ddf5b68fa60736c1dd9ffda5eba09bbd90309e1 (patch) | |
tree | 336e3162fbef90b4f838d2effc616b4a1bb20037 | |
parent | 9951a003e53d39a8d00b13b694a3a106e161c552 (diff) |
lexer: skip interpreter line in any source buffer
Skip interpreter lines in any source buffer and handle the skipping in the
lexer itself, to avoid reporting wrongly shifted token offsets to the
compiler, resulting in wrong error locations and source contexts.
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r-- | lexer.c | 36 | ||||
-rw-r--r-- | main.c | 27 | ||||
-rw-r--r-- | tests/custom/03_bugs/25_lexer_shifted_offsets | 21 |
3 files changed, 60 insertions, 24 deletions
@@ -1141,6 +1141,38 @@ lex_step(uc_lexer *lex, FILE *fp) return NULL; } +static void +uc_lexer_skip_shebang(uc_lexer *lex) +{ + uc_source *source = lex->source; + FILE *fp = source->fp; + int c1, c2; + + c1 = fgetc(fp); + c2 = fgetc(fp); + + if (c1 == '#' && c2 == '!') { + next_lineinfo(lex); + + source->off += 2; + + while ((c1 = fgetc(fp)) != EOF) { + source->off++; + + if (c1 == '\n') { + update_lineinfo(lex, source->off); + next_lineinfo(lex); + + break; + } + } + } + else { + ungetc(c2, fp); + ungetc(c1, fp); + } +} + void uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source) { @@ -1171,6 +1203,10 @@ uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source) lex->lead_surrogate = 0; lex->lastoff = 0; + + /* Skip any potential interpreter line */ + if (lex->source->off == 0) + uc_lexer_skip_shebang(lex); } void @@ -102,35 +102,17 @@ register_variable(uc_value_t *scope, const char *key, uc_value_t *val) static int parse(uc_parse_config *config, uc_source *src, - bool skip_shebang, uc_value_t *env, uc_value_t *modules, + uc_value_t *env, uc_value_t *modules, int argc, char **argv) { uc_value_t *globals = NULL; uc_function_t *entry; uc_vm vm = { 0 }; - int c, c2, rc = 0; + int rc = 0; char *err; uc_vm_init(&vm, config); - if (skip_shebang) { - c = fgetc(src->fp); - c2 = fgetc(src->fp); - - if (c == '#' && c2 == '!') { - while ((c = fgetc(src->fp)) != EOF) { - src->off++; - - if (c == '\n') - break; - } - } - else { - ungetc(c2, src->fp); - ungetc(c, src->fp); - } - } - entry = uc_compile(config, src, &err); if (!entry) { @@ -243,7 +225,6 @@ main(int argc, char **argv) uc_value_t *env = NULL, *modules = NULL, *o, *p; uc_source *source = NULL, *envfile = NULL; char *stdin = NULL, *c; - bool shebang = false; int opt, rv = 0; uc_parse_config config = { @@ -384,8 +365,6 @@ main(int argc, char **argv) rv = 1; goto out; } - - shebang = true; } if (!source) { @@ -394,7 +373,7 @@ main(int argc, char **argv) goto out; } - rv = parse(&config, source, shebang, env, modules, argc, argv); + rv = parse(&config, source, env, modules, argc, argv); out: ucv_put(modules); diff --git a/tests/custom/03_bugs/25_lexer_shifted_offsets b/tests/custom/03_bugs/25_lexer_shifted_offsets new file mode 100644 index 0000000..db10121 --- /dev/null +++ b/tests/custom/03_bugs/25_lexer_shifted_offsets @@ -0,0 +1,21 @@ +When lexing a source buffer with a non-zero offset, e.g. due to a +skipped interpreter line, lexical tokens reported a wrong offset +to the compiler, causing error locations and source context lines +to be incorrectly shifted. + +-- Testcase -- +#!/usr/bin/env ucode +{% + die("Error"); +%} +-- End -- + +-- Expect stderr -- +Error +In line 3, byte 12: + + ` die("Error");` + Near here -----^ + + +-- End -- |