syntax: fix quirks when parsing octal sequences

- Eliminate dead code left after regex literal parsing changes - Properly handle short octal sequences at end of string Signed-off-by: Jo-Philipp Wich <jo@mein.io>
author: Jo-Philipp Wich <jo@mein.io> 2020-11-30 19:23:01 +0100
committer: Jo-Philipp Wich <jo@mein.io> 2020-11-30 19:23:01 +0100
commit: 7caf3331ba7d8f0dd91659e4001c9d8256bf4558 (patch)
tree: ba6a32a3eb675fca44f0fc50d280037f1e70a4df
parent: b94c01f29408600721c7e3302392e9015fc7bafd (diff)
2 files changed, 14 insertions, 20 deletions
diff --git a/lexer.c b/lexer.c
index 5e2e7e8..a9c9d29 100644
--- a/lexer.c
+++ b/lexer.c
@@ -414,21 +414,8 @@ parse_string(struct uc_state *s)
 				case '5':
 				case '6':
 				case '7':
-				case '8':
-				case '9':
-					/* likely octal */
-					if (*ptr < '8') {
-						s->lex.esc[s->lex.esclen++] = 'o';
-						s->lex.esc[s->lex.esclen++] = *ptr;
-					}
-
-					/* non-octal char, add verbatim */
-					else {
-						s->lex.is_escape = false;
-						lookbehind_append(s, ptr, 1);
-						buf_consume(s, (ptr + 1) - s->lex.bufstart);
-					}
-
+					s->lex.esc[s->lex.esclen++] = 'o';
+					s->lex.esc[s->lex.esclen++] = *ptr;
 					break;
 
 				default:
@@ -549,7 +536,7 @@ parse_string(struct uc_state *s)
 							}
 
 							s->lex.esclen = 4;
-							buf_consume(s, ptr - s->lex.bufstart);
+							buf_consume(s, ptr-- - s->lex.bufstart);
 						}
 
 						/* append */
diff --git a/tests/00_syntax/09_string_literals b/tests/00_syntax/09_string_literals
index 381076e..0967850 100644
--- a/tests/00_syntax/09_string_literals
+++ b/tests/00_syntax/09_string_literals
@@ -28,16 +28,23 @@ Octal escape: ABC xyz
 Testing various parsing corner cases.
 
 -- Expect stdout --
-[ "\t", "\n", "y" ]
+[ "\t", "\n", "y", "\u0001", "\n", "\u0001\u0002", "\u0001\u0002", "\u0001\u0002", "\u0001a", "\na" ]
 -- End --
 
 -- Testcase --
 {%
 	print([
-		"\	",  // properly handle escaped tab
+		"\	",  	// properly handle escaped tab
 		"\
-",  			// properly handle escaped newline
-		"\y"	// substitute unrecognized escape with escaped char
+",  				// properly handle escaped newline
+		"\y",		// substitute unrecognized escape with escaped char
+		"\1",		// handle short octal sequence at end of string
+		"\12",		// handle short octal sequence at end of string
+		"\1\2",		// handle subsequent short octal sequences
+		"\001\2",	// handle short sequence after long one
+		"\1\002",	// handle long sequence after short one
+		"\1a",		// handle short octal sequence terminated by non-octal char
+		"\12a"		// handle short octal sequence terminated by non-octal char
 	], "\n");
 %}
 -- End --
author	Jo-Philipp Wich <jo@mein.io>	2020-11-30 19:23:01 +0100
committer	Jo-Philipp Wich <jo@mein.io>	2020-11-30 19:23:01 +0100
commit	7caf3331ba7d8f0dd91659e4001c9d8256bf4558 (patch)
tree	ba6a32a3eb675fca44f0fc50d280037f1e70a4df
parent	b94c01f29408600721c7e3302392e9015fc7bafd (diff)