lexer: improve token position reporting

- Report end position for emitted tokens. This is required to reliably determine the token length, e.g. for downstream code intelligence use cases - Fix start offset of continued template literal string tokens. Previously the start offset of a literal string following a `${...}` placeholder expressions was shifted by one byte - Report proper start offset of `TK_LEXP` tokens. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
author: Jo-Philipp Wich <jo@mein.io> 2024-09-23 23:20:12 +0200
committer: Jo-Philipp Wich <jo@mein.io> 2024-09-23 23:29:25 +0200
commit: 328a50ff82c9bf089dcd381d404dece683ef54d2 (patch)
tree: b4ba2de078cd0d748ada3bf1c368a4c7c3bf7334 /lexer.c
parent: fa22732f3463a443d5b42d933e245680fc8ed20f (diff)
1 files changed, 14 insertions, 4 deletions
diff --git a/lexer.c b/lexer.c
index 53f00f5..3e640c6 100644
--- a/lexer.c
+++ b/lexer.c
@@ -145,6 +145,8 @@ emit_op(uc_lexer_t *lex, ssize_t pos, int type, uc_value_t *uv)
 	else
 		lex->curr.pos = (size_t)pos;
 
+	lex->curr.end = lex->source->off;
+
 	return &lex->curr;
 }
 
@@ -338,7 +340,7 @@ parse_escape(uc_lexer_t *lex, const char *regex_macros)
 static uc_token_t *
 parse_string(uc_lexer_t *lex, int kind)
 {
-	uc_token_t *err;
+	uc_token_t *err, *tok;
 	unsigned type;
 	int code, ch;
 	size_t off;
@@ -359,7 +361,10 @@ parse_string(uc_lexer_t *lex, int kind)
 			if (type == TK_TEMPLATE && check_char(lex, '{')) {
 				lex->state = UC_LEX_PLACEHOLDER_START;
 
-				return emit_buffer(lex, off, type, NULL);
+				tok = emit_buffer(lex, off, type, NULL);
+				tok->end -= 2;
+
+				return tok;
 			}
 
 			uc_vector_push(&lex->buffer, '$');
@@ -987,6 +992,8 @@ lex_step(uc_lexer_t *lex)
 			if (!tok)
 				continue;
 
+			tok->end -= 2;
+
 			return tok;
 
 
@@ -1022,7 +1029,7 @@ lex_step(uc_lexer_t *lex)
 			lex->state = UC_LEX_IDENTIFY_TOKEN;
 			lex->block = EXPRESSION;
 
-			return emit_op(lex, lex->source->off, TK_LEXP, NULL);
+			return emit_op(lex, lex->source->off - 2, TK_LEXP, NULL);
 
 
 		case UC_LEX_IDENTIFY_TOKEN:
@@ -1092,7 +1099,10 @@ lex_step(uc_lexer_t *lex)
 		case UC_LEX_PLACEHOLDER_END:
 			lex->state = UC_LEX_IDENTIFY_TOKEN;
 
-			return parse_string(lex, '`');
+			tok = parse_string(lex, '`');
+			tok->pos++;
+
+			return tok;
 
 
 		case UC_LEX_EOF:
author	Jo-Philipp Wich <jo@mein.io>	2024-09-23 23:20:12 +0200
committer	Jo-Philipp Wich <jo@mein.io>	2024-09-23 23:29:25 +0200
commit	328a50ff82c9bf089dcd381d404dece683ef54d2 (patch)
tree	b4ba2de078cd0d748ada3bf1c368a4c7c3bf7334 /lexer.c
parent	fa22732f3463a443d5b42d933e245680fc8ed20f (diff)