summaryrefslogtreecommitdiffhomepage
path: root/lexer.c
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2021-03-11 20:03:31 +0100
committerJo-Philipp Wich <jo@mein.io>2021-03-11 20:03:31 +0100
commitf2b6b24975a3e8ad7e8bfb16d2449ba8e3d2536a (patch)
tree7dd9f61f220c73d2027008fd753632fb2354909d /lexer.c
parent98f06f4e475bc1ec4969ae9e770426b045743f14 (diff)
lexer: fix infinite loop in lineinfo encoding when consuming large chunks
A logic flaw in the lineinfo encoding function led to an infinite tight loop when a buffer chunk with 128 byte or more got consumed, which may happen when parsing very long literals. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c52
1 files changed, 16 insertions, 36 deletions
diff --git a/lexer.c b/lexer.c
index 0935494..bf099f5 100644
--- a/lexer.c
+++ b/lexer.c
@@ -277,40 +277,19 @@ _buf_startswith(uc_lexer *lex, const char *str, size_t len) {
#define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1)
-#if 0
-static void add_lineinfo(struct uc_state *s, size_t off)
-{
- uc_lineinfo *lines = &s->source->lineinfo;
- size_t linelen;
-
- linelen = off - s->lex.lastlineoff;
-
- /* lineinfo is encoded in bytes: the most significant bit specifies whether
- * to advance the line count by one or not, while the remaining 7 bits encode
- * the amounts of bytes on the current line.
- *
- * If a line has more than 127 characters, the first byte will be set to
- * 0xff (1 1111111) and subsequent bytes will encode the remaining characters
- * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus
- * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111).
- *
- * The newline character itself is not counted, so an empty line is encoded as
- * 0x80 (1:0000000).
- */
- uc_vector_grow(lines);
- lines->entries[lines->count++] = 0x80 + (linelen & 0x7f);
- linelen -= (linelen & 0x7f);
-
- while (linelen > 0) {
- uc_vector_grow(lines);
- lines->entries[lines->count++] = (linelen & 0x7f);
- linelen -= (linelen & 0x7f);
- }
- s->lex.lastlineoff = off + 1;
- s->lex.line++;
-}
-#endif
+/* lineinfo is encoded in bytes: the most significant bit specifies whether
+ * to advance the line count by one or not, while the remaining 7 bits encode
+ * the amounts of bytes on the current line.
+ *
+ * If a line has more than 127 characters, the first byte will be set to
+ * 0xff (1 1111111) and subsequent bytes will encode the remaining characters
+ * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus
+ * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111).
+ *
+ * The newline character itself is not counted, so an empty line is encoded as
+ * 0x80 (1:0000000).
+ */
static void
next_lineinfo(uc_lexer *lex)
@@ -325,7 +304,7 @@ static void
update_lineinfo(uc_lexer *lex, size_t off)
{
uc_lineinfo *lines = &lex->source->lineinfo;
- uint8_t *entry;
+ uint8_t *entry, n;
entry = uc_vector_last(lines);
@@ -337,10 +316,11 @@ update_lineinfo(uc_lexer *lex, size_t off)
entry[0] |= 0x7f;
while (off > 0) {
+ n = (off > 0x7f) ? 0x7f : off;
uc_vector_grow(lines);
entry = uc_vector_last(lines);
- entry[1] = (off & 0x7f);
- off -= (off & 0x7f);
+ entry[1] = n;
+ off -= n;
lines->count++;
}
}