summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2020-11-30 17:38:50 +0100
committerJo-Philipp Wich <jo@mein.io>2020-11-30 17:38:50 +0100
commitac5cb8736f89f8da0c1bbe407d7d4ae9df530a5c (patch)
tree8e1f0a1e5e1bcde78f275839fb109e3eaf9bdb7b
parentf7b079ce3a41a0f92adb623b0de10419fc9f5df9 (diff)
syntax: fix string and regex literal parsing quirks
- Do not interprete escape sequences in regexp literals - Do not improperly substitute control escape sequences such as `\n` or `\a` after a backslash Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--lexer.c24
-rw-r--r--tests/00_syntax/09_string_literals18
-rw-r--r--tests/00_syntax/21_regex_literals89
3 files changed, 120 insertions, 11 deletions
diff --git a/lexer.c b/lexer.c
index 2867e76..21a3b3a 100644
--- a/lexer.c
+++ b/lexer.c
@@ -394,6 +394,16 @@ parse_string(struct uc_state *s)
/* continuation of escape sequence */
if (s->lex.is_escape) {
if (s->lex.esclen == 0) {
+ /* regex mode => do not interprete escapes */
+ if (q == '/') {
+ s->lex.is_escape = false;
+ lookbehind_append(s, "\\", 1);
+ lookbehind_append(s, ptr, 1);
+ buf_consume(s, (ptr + 1) - s->lex.bufstart);
+
+ continue;
+ }
+
/* non-unicode escape following a lead surrogate, emit replacement... */
if (s->lex.lead_surrogate && *ptr != 'u') {
append_utf8(s, 0xFFFD);
@@ -416,16 +426,8 @@ parse_string(struct uc_state *s)
case '7':
case '8':
case '9':
- /* regex mode => backref, retain literally */
- if (q == '/') {
- s->lex.is_escape = false;
- lookbehind_append(s, "\\", 1);
- lookbehind_append(s, ptr, 1);
- buf_consume(s, (ptr + 1) - s->lex.bufstart);
- }
-
- /* string mode => likely octal */
- else if (*ptr < '8') {
+ /* likely octal */
+ if (*ptr < '8') {
s->lex.esc[s->lex.esclen++] = 'o';
s->lex.esc[s->lex.esclen++] = *ptr;
}
@@ -442,7 +444,7 @@ parse_string(struct uc_state *s)
default:
s->lex.is_escape = false;
c = strchr("a\ab\be\ef\fn\nr\rt\tv\v", *ptr);
- lookbehind_append(s, c ? c + 1 : ptr, 1);
+ lookbehind_append(s, (c && *c >= 'a') ? c + 1 : ptr, 1);
buf_consume(s, (ptr + 1) - s->lex.bufstart);
break;
}
diff --git a/tests/00_syntax/09_string_literals b/tests/00_syntax/09_string_literals
index cd84d3e..381076e 100644
--- a/tests/00_syntax/09_string_literals
+++ b/tests/00_syntax/09_string_literals
@@ -23,3 +23,21 @@ Octal escape: ABC xyz
{{ "Octal escape: \101\102\103 \170\171\172" }}
{{ { "Single char escape": "\a\b\e\f\r\t\v\\\n" } }}
-- End --
+
+
+Testing various parsing corner cases.
+
+-- Expect stdout --
+[ "\t", "\n", "y" ]
+-- End --
+
+-- Testcase --
+{%
+ print([
+ "\ ", // properly handle escaped tab
+ "\
+", // properly handle escaped newline
+ "\y" // substitute unrecognized escape with escaped char
+ ], "\n");
+%}
+-- End --
diff --git a/tests/00_syntax/21_regex_literals b/tests/00_syntax/21_regex_literals
new file mode 100644
index 0000000..bbb78fb
--- /dev/null
+++ b/tests/00_syntax/21_regex_literals
@@ -0,0 +1,89 @@
+Regex literals are enclosed in forward slashes and may contain zero
+or more trailing flag characters. Interpretation of escape sequences
+within regular expression literals is subject of the underlying
+regular expression engine.
+
+-- Expect stdout --
+[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\\n\\.\\a\\b\\c\\u2600\\\\/" ]
+-- End --
+
+-- Testcase --
+{%
+ print([
+ /Hello world/, // A very simple expression
+ /test/gsi, // Regular expression flags
+ /test/gg, // Repeated flags
+ /test1 \/ test2/, // Escaped forward slash
+ /\x31\n\.\a\b\c\u2600\\/ // Ensure that escape sequences are passed as-is
+ ], "\n");
+%}
+-- End --
+
+
+Testing regular expression type.
+
+-- Expect stdout --
+object
+-- End --
+
+-- Testcase --
+{{ type(/foo/) }}
+-- End --
+
+
+Testing invalid flag characters.
+
+-- Expect stderr --
+Syntax error: Unexpected token
+Expecting ',' or ';'
+In line 2, byte 8:
+
+ ` /test/x`
+ ^-- Near here
+
+
+-- End --
+
+-- Testcase --
+{%
+ /test/x
+%}
+-- End --
+
+
+Testing unclosed regular expression.
+
+-- Expect stderr --
+Syntax error: Unterminated string
+In line 2, byte 2:
+
+ ` /foo \/`
+ ^-- Near here
+
+
+-- End --
+
+-- Testcase --
+{%
+ /foo \/
+%}
+-- End --
+
+
+Testing regex compilation errors.
+
+-- Expect stderr --
+Syntax error: Unmatched \{
+In line 2, byte 3:
+
+ ` /foo {/`
+ ^-- Near here
+
+
+-- End --
+
+-- Testcase --
+{%
+ /foo {/
+%}
+-- End --