diff options
author | Jo-Philipp Wich <jo@mein.io> | 2024-09-23 23:20:12 +0200 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2024-09-23 23:29:25 +0200 |
commit | 328a50ff82c9bf089dcd381d404dece683ef54d2 (patch) | |
tree | b4ba2de078cd0d748ada3bf1c368a4c7c3bf7334 | |
parent | fa22732f3463a443d5b42d933e245680fc8ed20f (diff) |
lexer: improve token position reporting
- Report end position for emitted tokens. This is required to reliably
determine the token length, e.g. for downstream code intelligence
use cases
- Fix start offset of continued template literal string tokens.
Previously the start offset of a literal string following a `${...}`
placeholder expressions was shifted by one byte
- Report proper start offset of `TK_LEXP` tokens.
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r-- | include/ucode/lexer.h | 1 | ||||
-rw-r--r-- | lexer.c | 18 |
2 files changed, 15 insertions, 4 deletions
diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h index 1728aa3..8929731 100644 --- a/include/ucode/lexer.h +++ b/include/ucode/lexer.h @@ -138,6 +138,7 @@ typedef struct { uc_tokentype_t type; uc_value_t *uv; size_t pos; + size_t end; } uc_token_t; typedef struct { @@ -145,6 +145,8 @@ emit_op(uc_lexer_t *lex, ssize_t pos, int type, uc_value_t *uv) else lex->curr.pos = (size_t)pos; + lex->curr.end = lex->source->off; + return &lex->curr; } @@ -338,7 +340,7 @@ parse_escape(uc_lexer_t *lex, const char *regex_macros) static uc_token_t * parse_string(uc_lexer_t *lex, int kind) { - uc_token_t *err; + uc_token_t *err, *tok; unsigned type; int code, ch; size_t off; @@ -359,7 +361,10 @@ parse_string(uc_lexer_t *lex, int kind) if (type == TK_TEMPLATE && check_char(lex, '{')) { lex->state = UC_LEX_PLACEHOLDER_START; - return emit_buffer(lex, off, type, NULL); + tok = emit_buffer(lex, off, type, NULL); + tok->end -= 2; + + return tok; } uc_vector_push(&lex->buffer, '$'); @@ -987,6 +992,8 @@ lex_step(uc_lexer_t *lex) if (!tok) continue; + tok->end -= 2; + return tok; @@ -1022,7 +1029,7 @@ lex_step(uc_lexer_t *lex) lex->state = UC_LEX_IDENTIFY_TOKEN; lex->block = EXPRESSION; - return emit_op(lex, lex->source->off, TK_LEXP, NULL); + return emit_op(lex, lex->source->off - 2, TK_LEXP, NULL); case UC_LEX_IDENTIFY_TOKEN: @@ -1092,7 +1099,10 @@ lex_step(uc_lexer_t *lex) case UC_LEX_PLACEHOLDER_END: lex->state = UC_LEX_IDENTIFY_TOKEN; - return parse_string(lex, '`'); + tok = parse_string(lex, '`'); + tok->pos++; + + return tok; case UC_LEX_EOF: |