summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2024-09-23 23:20:12 +0200
committerJo-Philipp Wich <jo@mein.io>2024-09-23 23:29:25 +0200
commit328a50ff82c9bf089dcd381d404dece683ef54d2 (patch)
treeb4ba2de078cd0d748ada3bf1c368a4c7c3bf7334
parentfa22732f3463a443d5b42d933e245680fc8ed20f (diff)
lexer: improve token position reporting
- Report end position for emitted tokens. This is required to reliably determine the token length, e.g. for downstream code intelligence use cases - Fix start offset of continued template literal string tokens. Previously the start offset of a literal string following a `${...}` placeholder expressions was shifted by one byte - Report proper start offset of `TK_LEXP` tokens. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--include/ucode/lexer.h1
-rw-r--r--lexer.c18
2 files changed, 15 insertions, 4 deletions
diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h
index 1728aa3..8929731 100644
--- a/include/ucode/lexer.h
+++ b/include/ucode/lexer.h
@@ -138,6 +138,7 @@ typedef struct {
uc_tokentype_t type;
uc_value_t *uv;
size_t pos;
+ size_t end;
} uc_token_t;
typedef struct {
diff --git a/lexer.c b/lexer.c
index 53f00f5..3e640c6 100644
--- a/lexer.c
+++ b/lexer.c
@@ -145,6 +145,8 @@ emit_op(uc_lexer_t *lex, ssize_t pos, int type, uc_value_t *uv)
else
lex->curr.pos = (size_t)pos;
+ lex->curr.end = lex->source->off;
+
return &lex->curr;
}
@@ -338,7 +340,7 @@ parse_escape(uc_lexer_t *lex, const char *regex_macros)
static uc_token_t *
parse_string(uc_lexer_t *lex, int kind)
{
- uc_token_t *err;
+ uc_token_t *err, *tok;
unsigned type;
int code, ch;
size_t off;
@@ -359,7 +361,10 @@ parse_string(uc_lexer_t *lex, int kind)
if (type == TK_TEMPLATE && check_char(lex, '{')) {
lex->state = UC_LEX_PLACEHOLDER_START;
- return emit_buffer(lex, off, type, NULL);
+ tok = emit_buffer(lex, off, type, NULL);
+ tok->end -= 2;
+
+ return tok;
}
uc_vector_push(&lex->buffer, '$');
@@ -987,6 +992,8 @@ lex_step(uc_lexer_t *lex)
if (!tok)
continue;
+ tok->end -= 2;
+
return tok;
@@ -1022,7 +1029,7 @@ lex_step(uc_lexer_t *lex)
lex->state = UC_LEX_IDENTIFY_TOKEN;
lex->block = EXPRESSION;
- return emit_op(lex, lex->source->off, TK_LEXP, NULL);
+ return emit_op(lex, lex->source->off - 2, TK_LEXP, NULL);
case UC_LEX_IDENTIFY_TOKEN:
@@ -1092,7 +1099,10 @@ lex_step(uc_lexer_t *lex)
case UC_LEX_PLACEHOLDER_END:
lex->state = UC_LEX_IDENTIFY_TOKEN;
- return parse_string(lex, '`');
+ tok = parse_string(lex, '`');
+ tok->pos++;
+
+ return tok;
case UC_LEX_EOF: