diff options
author | Jo-Philipp Wich <jo@mein.io> | 2020-11-30 18:13:47 +0100 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2020-11-30 18:17:48 +0100 |
commit | b94c01f29408600721c7e3302392e9015fc7bafd (patch) | |
tree | 828d85dd734968f70b23f5aea9c6ab0ffae593c0 | |
parent | ac5cb8736f89f8da0c1bbe407d7d4ae9df530a5c (diff) |
syntax: recognize single-char escapes in regex literals again
Ensure that the single char escapes `\a`, `\b`, `\e`, `\f`, `\n`,
`\r`, `\t` and `\v` keep working. Since they're not part of the POSIX
extended regular expression spec, they're not handled by the RE engine
so we need to substitute them by their actual byte value while parsing
the literal.
Fixes: ac5cb87 ("syntax: fix string and regex literal parsing quirks")
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r-- | lexer.c | 26 | ||||
-rw-r--r-- | tests/00_syntax/21_regex_literals | 2 |
2 files changed, 15 insertions, 13 deletions
@@ -394,23 +394,13 @@ parse_string(struct uc_state *s) /* continuation of escape sequence */ if (s->lex.is_escape) { if (s->lex.esclen == 0) { - /* regex mode => do not interprete escapes */ - if (q == '/') { - s->lex.is_escape = false; - lookbehind_append(s, "\\", 1); - lookbehind_append(s, ptr, 1); - buf_consume(s, (ptr + 1) - s->lex.bufstart); - - continue; - } - /* non-unicode escape following a lead surrogate, emit replacement... */ if (s->lex.lead_surrogate && *ptr != 'u') { append_utf8(s, 0xFFFD); s->lex.lead_surrogate = 0; } - switch (*ptr) { + switch ((q == '/') ? 0 : *ptr) { case 'u': case 'x': s->lex.esc[s->lex.esclen++] = *ptr; @@ -444,8 +434,20 @@ parse_string(struct uc_state *s) default: s->lex.is_escape = false; c = strchr("a\ab\be\ef\fn\nr\rt\tv\v", *ptr); - lookbehind_append(s, (c && *c >= 'a') ? c + 1 : ptr, 1); + + if (c && *c >= 'a') { + lookbehind_append(s, c + 1, 1); + } + else { + /* regex mode => retain backslash */ + if (q == '/') + lookbehind_append(s, "\\", 1); + + lookbehind_append(s, ptr, 1); + } + buf_consume(s, (ptr + 1) - s->lex.bufstart); + break; } } diff --git a/tests/00_syntax/21_regex_literals b/tests/00_syntax/21_regex_literals index bbb78fb..4aef33f 100644 --- a/tests/00_syntax/21_regex_literals +++ b/tests/00_syntax/21_regex_literals @@ -4,7 +4,7 @@ within regular expression literals is subject of the underlying regular expression engine. -- Expect stdout -- -[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\\n\\.\\a\\b\\c\\u2600\\\\/" ] +[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\n\\.\u0007\b\\c\\u2600\\\\/" ] -- End -- -- Testcase -- |