diff options
author | Jo-Philipp Wich <jo@mein.io> | 2020-11-30 17:38:50 +0100 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2020-11-30 17:38:50 +0100 |
commit | ac5cb8736f89f8da0c1bbe407d7d4ae9df530a5c (patch) | |
tree | 8e1f0a1e5e1bcde78f275839fb109e3eaf9bdb7b | |
parent | f7b079ce3a41a0f92adb623b0de10419fc9f5df9 (diff) |
syntax: fix string and regex literal parsing quirks
- Do not interprete escape sequences in regexp literals
- Do not improperly substitute control escape sequences such as
`\n` or `\a` after a backslash
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r-- | lexer.c | 24 | ||||
-rw-r--r-- | tests/00_syntax/09_string_literals | 18 | ||||
-rw-r--r-- | tests/00_syntax/21_regex_literals | 89 |
3 files changed, 120 insertions, 11 deletions
@@ -394,6 +394,16 @@ parse_string(struct uc_state *s) /* continuation of escape sequence */ if (s->lex.is_escape) { if (s->lex.esclen == 0) { + /* regex mode => do not interprete escapes */ + if (q == '/') { + s->lex.is_escape = false; + lookbehind_append(s, "\\", 1); + lookbehind_append(s, ptr, 1); + buf_consume(s, (ptr + 1) - s->lex.bufstart); + + continue; + } + /* non-unicode escape following a lead surrogate, emit replacement... */ if (s->lex.lead_surrogate && *ptr != 'u') { append_utf8(s, 0xFFFD); @@ -416,16 +426,8 @@ parse_string(struct uc_state *s) case '7': case '8': case '9': - /* regex mode => backref, retain literally */ - if (q == '/') { - s->lex.is_escape = false; - lookbehind_append(s, "\\", 1); - lookbehind_append(s, ptr, 1); - buf_consume(s, (ptr + 1) - s->lex.bufstart); - } - - /* string mode => likely octal */ - else if (*ptr < '8') { + /* likely octal */ + if (*ptr < '8') { s->lex.esc[s->lex.esclen++] = 'o'; s->lex.esc[s->lex.esclen++] = *ptr; } @@ -442,7 +444,7 @@ parse_string(struct uc_state *s) default: s->lex.is_escape = false; c = strchr("a\ab\be\ef\fn\nr\rt\tv\v", *ptr); - lookbehind_append(s, c ? c + 1 : ptr, 1); + lookbehind_append(s, (c && *c >= 'a') ? c + 1 : ptr, 1); buf_consume(s, (ptr + 1) - s->lex.bufstart); break; } diff --git a/tests/00_syntax/09_string_literals b/tests/00_syntax/09_string_literals index cd84d3e..381076e 100644 --- a/tests/00_syntax/09_string_literals +++ b/tests/00_syntax/09_string_literals @@ -23,3 +23,21 @@ Octal escape: ABC xyz {{ "Octal escape: \101\102\103 \170\171\172" }} {{ { "Single char escape": "\a\b\e\f\r\t\v\\\n" } }} -- End -- + + +Testing various parsing corner cases. + +-- Expect stdout -- +[ "\t", "\n", "y" ] +-- End -- + +-- Testcase -- +{% + print([ + "\ ", // properly handle escaped tab + "\ +", // properly handle escaped newline + "\y" // substitute unrecognized escape with escaped char + ], "\n"); +%} +-- End -- diff --git a/tests/00_syntax/21_regex_literals b/tests/00_syntax/21_regex_literals new file mode 100644 index 0000000..bbb78fb --- /dev/null +++ b/tests/00_syntax/21_regex_literals @@ -0,0 +1,89 @@ +Regex literals are enclosed in forward slashes and may contain zero +or more trailing flag characters. Interpretation of escape sequences +within regular expression literals is subject of the underlying +regular expression engine. + +-- Expect stdout -- +[ "/Hello world/", "/test/gis", "/test/g", "/test1 \\\/ test2/", "/\\x31\\n\\.\\a\\b\\c\\u2600\\\\/" ] +-- End -- + +-- Testcase -- +{% + print([ + /Hello world/, // A very simple expression + /test/gsi, // Regular expression flags + /test/gg, // Repeated flags + /test1 \/ test2/, // Escaped forward slash + /\x31\n\.\a\b\c\u2600\\/ // Ensure that escape sequences are passed as-is + ], "\n"); +%} +-- End -- + + +Testing regular expression type. + +-- Expect stdout -- +object +-- End -- + +-- Testcase -- +{{ type(/foo/) }} +-- End -- + + +Testing invalid flag characters. + +-- Expect stderr -- +Syntax error: Unexpected token +Expecting ',' or ';' +In line 2, byte 8: + + ` /test/x` + ^-- Near here + + +-- End -- + +-- Testcase -- +{% + /test/x +%} +-- End -- + + +Testing unclosed regular expression. + +-- Expect stderr -- +Syntax error: Unterminated string +In line 2, byte 2: + + ` /foo \/` + ^-- Near here + + +-- End -- + +-- Testcase -- +{% + /foo \/ +%} +-- End -- + + +Testing regex compilation errors. + +-- Expect stderr -- +Syntax error: Unmatched \{ +In line 2, byte 3: + + ` /foo {/` + ^-- Near here + + +-- End -- + +-- Testcase -- +{% + /foo {/ +%} +-- End -- |