summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2020-11-30 18:13:47 +0100
committerJo-Philipp Wich <jo@mein.io>2020-11-30 18:17:48 +0100
commitb94c01f29408600721c7e3302392e9015fc7bafd (patch)
tree828d85dd734968f70b23f5aea9c6ab0ffae593c0
parentac5cb8736f89f8da0c1bbe407d7d4ae9df530a5c (diff)
syntax: recognize single-char escapes in regex literals again
Ensure that the single char escapes `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t` and `\v` keep working. Since they're not part of the POSIX extended regular expression spec, they're not handled by the RE engine so we need to substitute them by their actual byte value while parsing the literal. Fixes: ac5cb87 ("syntax: fix string and regex literal parsing quirks") Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--lexer.c26
-rw-r--r--tests/00_syntax/21_regex_literals2
2 files changed, 15 insertions, 13 deletions
diff --git a/lexer.c b/lexer.c
index 21a3b3a..5e2e7e8 100644
--- a/lexer.c
+++ b/lexer.c
@@ -394,23 +394,13 @@ parse_string(struct uc_state *s)
/* continuation of escape sequence */
if (s->lex.is_escape) {
if (s->lex.esclen == 0) {
- /* regex mode => do not interprete escapes */
- if (q == '/') {
- s->lex.is_escape = false;
- lookbehind_append(s, "\\", 1);
- lookbehind_append(s, ptr, 1);
- buf_consume(s, (ptr + 1) - s->lex.bufstart);
-
- continue;
- }
-
/* non-unicode escape following a lead surrogate, emit replacement... */
if (s->lex.lead_surrogate && *ptr != 'u') {
append_utf8(s, 0xFFFD);
s->lex.lead_surrogate = 0;
}
- switch (*ptr) {
+ switch ((q == '/') ? 0 : *ptr) {
case 'u':
case 'x':
s->lex.esc[s->lex.esclen++] = *ptr;
@@ -444,8 +434,20 @@ parse_string(struct uc_state *s)
default:
s->lex.is_escape = false;
c = strchr("a\ab\be\ef\fn\nr\rt\tv\v", *ptr);
- lookbehind_append(s, (c && *c >= 'a') ? c + 1 : ptr, 1);
+
+ if (c && *c >= 'a') {
+ lookbehind_append(s, c + 1, 1);
+ }
+ else {
+ /* regex mode => retain backslash */
+ if (q == '/')
+ lookbehind_append(s, "\\", 1);
+
+ lookbehind_append(s, ptr, 1);
+ }
+
buf_consume(s, (ptr + 1) - s->lex.bufstart);
+
break;
}
}
diff --git a/tests/00_syntax/21_regex_literals b/tests/00_syntax/21_regex_literals
index bbb78fb..4aef33f 100644
--- a/tests/00_syntax/21_regex_literals
+++ b/tests/00_syntax/21_regex_literals
@@ -4,7 +4,7 @@ within regular expression literals is subject of the underlying
regular expression engine.
-- Expect stdout --
-[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\\n\\.\\a\\b\\c\\u2600\\\\/" ]
+[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\n\\.\u0007\b\\c\\u2600\\\\/" ]
-- End --
-- Testcase --