diff options
author | Jo-Philipp Wich <jo@mein.io> | 2020-11-30 18:13:47 +0100 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2020-11-30 18:17:48 +0100 |
commit | b94c01f29408600721c7e3302392e9015fc7bafd (patch) | |
tree | 828d85dd734968f70b23f5aea9c6ab0ffae593c0 /lexer.c | |
parent | ac5cb8736f89f8da0c1bbe407d7d4ae9df530a5c (diff) |
syntax: recognize single-char escapes in regex literals again
Ensure that the single char escapes `\a`, `\b`, `\e`, `\f`, `\n`,
`\r`, `\t` and `\v` keep working. Since they're not part of the POSIX
extended regular expression spec, they're not handled by the RE engine
so we need to substitute them by their actual byte value while parsing
the literal.
Fixes: ac5cb87 ("syntax: fix string and regex literal parsing quirks")
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 26 |
1 files changed, 14 insertions, 12 deletions
@@ -394,23 +394,13 @@ parse_string(struct uc_state *s) /* continuation of escape sequence */ if (s->lex.is_escape) { if (s->lex.esclen == 0) { - /* regex mode => do not interprete escapes */ - if (q == '/') { - s->lex.is_escape = false; - lookbehind_append(s, "\\", 1); - lookbehind_append(s, ptr, 1); - buf_consume(s, (ptr + 1) - s->lex.bufstart); - - continue; - } - /* non-unicode escape following a lead surrogate, emit replacement... */ if (s->lex.lead_surrogate && *ptr != 'u') { append_utf8(s, 0xFFFD); s->lex.lead_surrogate = 0; } - switch (*ptr) { + switch ((q == '/') ? 0 : *ptr) { case 'u': case 'x': s->lex.esc[s->lex.esclen++] = *ptr; @@ -444,8 +434,20 @@ parse_string(struct uc_state *s) default: s->lex.is_escape = false; c = strchr("a\ab\be\ef\fn\nr\rt\tv\v", *ptr); - lookbehind_append(s, (c && *c >= 'a') ? c + 1 : ptr, 1); + + if (c && *c >= 'a') { + lookbehind_append(s, c + 1, 1); + } + else { + /* regex mode => retain backslash */ + if (q == '/') + lookbehind_append(s, "\\", 1); + + lookbehind_append(s, ptr, 1); + } + buf_consume(s, (ptr + 1) - s->lex.bufstart); + break; } } |