From b94c01f29408600721c7e3302392e9015fc7bafd Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Mon, 30 Nov 2020 18:13:47 +0100 Subject: syntax: recognize single-char escapes in regex literals again Ensure that the single char escapes `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t` and `\v` keep working. Since they're not part of the POSIX extended regular expression spec, they're not handled by the RE engine so we need to substitute them by their actual byte value while parsing the literal. Fixes: ac5cb87 ("syntax: fix string and regex literal parsing quirks") Signed-off-by: Jo-Philipp Wich --- lexer.c | 26 ++++++++++++++------------ tests/00_syntax/21_regex_literals | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/lexer.c b/lexer.c index 21a3b3a..5e2e7e8 100644 --- a/lexer.c +++ b/lexer.c @@ -394,23 +394,13 @@ parse_string(struct uc_state *s) /* continuation of escape sequence */ if (s->lex.is_escape) { if (s->lex.esclen == 0) { - /* regex mode => do not interprete escapes */ - if (q == '/') { - s->lex.is_escape = false; - lookbehind_append(s, "\\", 1); - lookbehind_append(s, ptr, 1); - buf_consume(s, (ptr + 1) - s->lex.bufstart); - - continue; - } - /* non-unicode escape following a lead surrogate, emit replacement... */ if (s->lex.lead_surrogate && *ptr != 'u') { append_utf8(s, 0xFFFD); s->lex.lead_surrogate = 0; } - switch (*ptr) { + switch ((q == '/') ? 0 : *ptr) { case 'u': case 'x': s->lex.esc[s->lex.esclen++] = *ptr; @@ -444,8 +434,20 @@ parse_string(struct uc_state *s) default: s->lex.is_escape = false; c = strchr("a\ab\be\ef\fn\nr\rt\tv\v", *ptr); - lookbehind_append(s, (c && *c >= 'a') ? c + 1 : ptr, 1); + + if (c && *c >= 'a') { + lookbehind_append(s, c + 1, 1); + } + else { + /* regex mode => retain backslash */ + if (q == '/') + lookbehind_append(s, "\\", 1); + + lookbehind_append(s, ptr, 1); + } + buf_consume(s, (ptr + 1) - s->lex.bufstart); + break; } } diff --git a/tests/00_syntax/21_regex_literals b/tests/00_syntax/21_regex_literals index bbb78fb..4aef33f 100644 --- a/tests/00_syntax/21_regex_literals +++ b/tests/00_syntax/21_regex_literals @@ -4,7 +4,7 @@ within regular expression literals is subject of the underlying regular expression engine. -- Expect stdout -- -[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\\n\\.\\a\\b\\c\\u2600\\\\/" ] +[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\n\\.\u0007\b\\c\\u2600\\\\/" ] -- End -- -- Testcase -- -- cgit v1.2.3