summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2021-05-14 18:45:10 +0200
committerJo-Philipp Wich <jo@mein.io>2021-05-14 18:46:31 +0200
commit1ddf5b68fa60736c1dd9ffda5eba09bbd90309e1 (patch)
tree336e3162fbef90b4f838d2effc616b4a1bb20037
parent9951a003e53d39a8d00b13b694a3a106e161c552 (diff)
lexer: skip interpreter line in any source buffer
Skip interpreter lines in any source buffer and handle the skipping in the lexer itself, to avoid reporting wrongly shifted token offsets to the compiler, resulting in wrong error locations and source contexts. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--lexer.c36
-rw-r--r--main.c27
-rw-r--r--tests/custom/03_bugs/25_lexer_shifted_offsets21
3 files changed, 60 insertions, 24 deletions
diff --git a/lexer.c b/lexer.c
index d9d657e..25a5cf4 100644
--- a/lexer.c
+++ b/lexer.c
@@ -1141,6 +1141,38 @@ lex_step(uc_lexer *lex, FILE *fp)
return NULL;
}
+static void
+uc_lexer_skip_shebang(uc_lexer *lex)
+{
+ uc_source *source = lex->source;
+ FILE *fp = source->fp;
+ int c1, c2;
+
+ c1 = fgetc(fp);
+ c2 = fgetc(fp);
+
+ if (c1 == '#' && c2 == '!') {
+ next_lineinfo(lex);
+
+ source->off += 2;
+
+ while ((c1 = fgetc(fp)) != EOF) {
+ source->off++;
+
+ if (c1 == '\n') {
+ update_lineinfo(lex, source->off);
+ next_lineinfo(lex);
+
+ break;
+ }
+ }
+ }
+ else {
+ ungetc(c2, fp);
+ ungetc(c1, fp);
+ }
+}
+
void
uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source)
{
@@ -1171,6 +1203,10 @@ uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source)
lex->lead_surrogate = 0;
lex->lastoff = 0;
+
+ /* Skip any potential interpreter line */
+ if (lex->source->off == 0)
+ uc_lexer_skip_shebang(lex);
}
void
diff --git a/main.c b/main.c
index baea9fb..3fd1f25 100644
--- a/main.c
+++ b/main.c
@@ -102,35 +102,17 @@ register_variable(uc_value_t *scope, const char *key, uc_value_t *val)
static int
parse(uc_parse_config *config, uc_source *src,
- bool skip_shebang, uc_value_t *env, uc_value_t *modules,
+ uc_value_t *env, uc_value_t *modules,
int argc, char **argv)
{
uc_value_t *globals = NULL;
uc_function_t *entry;
uc_vm vm = { 0 };
- int c, c2, rc = 0;
+ int rc = 0;
char *err;
uc_vm_init(&vm, config);
- if (skip_shebang) {
- c = fgetc(src->fp);
- c2 = fgetc(src->fp);
-
- if (c == '#' && c2 == '!') {
- while ((c = fgetc(src->fp)) != EOF) {
- src->off++;
-
- if (c == '\n')
- break;
- }
- }
- else {
- ungetc(c2, src->fp);
- ungetc(c, src->fp);
- }
- }
-
entry = uc_compile(config, src, &err);
if (!entry) {
@@ -243,7 +225,6 @@ main(int argc, char **argv)
uc_value_t *env = NULL, *modules = NULL, *o, *p;
uc_source *source = NULL, *envfile = NULL;
char *stdin = NULL, *c;
- bool shebang = false;
int opt, rv = 0;
uc_parse_config config = {
@@ -384,8 +365,6 @@ main(int argc, char **argv)
rv = 1;
goto out;
}
-
- shebang = true;
}
if (!source) {
@@ -394,7 +373,7 @@ main(int argc, char **argv)
goto out;
}
- rv = parse(&config, source, shebang, env, modules, argc, argv);
+ rv = parse(&config, source, env, modules, argc, argv);
out:
ucv_put(modules);
diff --git a/tests/custom/03_bugs/25_lexer_shifted_offsets b/tests/custom/03_bugs/25_lexer_shifted_offsets
new file mode 100644
index 0000000..db10121
--- /dev/null
+++ b/tests/custom/03_bugs/25_lexer_shifted_offsets
@@ -0,0 +1,21 @@
+When lexing a source buffer with a non-zero offset, e.g. due to a
+skipped interpreter line, lexical tokens reported a wrong offset
+to the compiler, causing error locations and source context lines
+to be incorrectly shifted.
+
+-- Testcase --
+#!/usr/bin/env ucode
+{%
+ die("Error");
+%}
+-- End --
+
+-- Expect stderr --
+Error
+In line 3, byte 12:
+
+ ` die("Error");`
+ Near here -----^
+
+
+-- End --