From 98f06f4e475bc1ec4969ae9e770426b045743f14 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 11 Mar 2021 18:48:59 +0100 Subject: lexer: properly handle string escape sequences at buffer boundary While parsing string literals, actually consume the backslash introducing an escape sequence to prevent it from ending up in the produced string if the scanner is at the end of the buffer and the remaining buffer contents are flushed after the consumer loop. Signed-off-by: Jo-Philipp Wich --- lexer.c | 2 +- tests/03_bugs/06_lexer_escape_at_boundary | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/03_bugs/06_lexer_escape_at_boundary diff --git a/lexer.c b/lexer.c index dfb7d98..0935494 100644 --- a/lexer.c +++ b/lexer.c @@ -614,7 +614,7 @@ parse_string(uc_lexer *lex, bool no_regexp) else if (*ptr == '\\') { lex->is_escape = true; lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); + buf_consume(lex, (ptr - lex->bufstart) + 1); } } diff --git a/tests/03_bugs/06_lexer_escape_at_boundary b/tests/03_bugs/06_lexer_escape_at_boundary new file mode 100644 index 0000000..e80b0a1 --- /dev/null +++ b/tests/03_bugs/06_lexer_escape_at_boundary @@ -0,0 +1,12 @@ +When the lexer processed a backslash introducing a string escape directly +at the buffer boundary, the backslash was incorrectly retained. + +-- Testcase -- +{% + print("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkl\n"); +%} +-- End -- + +-- Expect stdout -- +abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkl +-- End -- -- cgit v1.2.3