summaryrefslogtreecommitdiffhomepage
path: root/ast.h
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2020-10-09 16:01:31 +0200
committerJo-Philipp Wich <jo@mein.io>2020-10-14 12:09:28 +0200
commit6ad05263426e6f4aae4665d52b9ed1962ab4cd24 (patch)
tree7b73f563e291eeab944071e0c9a3b9128e924c6b /ast.h
parent4d1c4e28b8d8368a105717e142f8e920cbf4ea0f (diff)
lexer: rewrite
Rewrite the lexer into a restartable state machine to support parsing from file streams without the need to read the entire source text into memory first. As a side effect, the length of labels and strings is unlimited now. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'ast.h')
-rw-r--r--ast.h41
1 files changed, 31 insertions, 10 deletions
diff --git a/ast.h b/ast.h
index 339c178..1d825c7 100644
--- a/ast.h
+++ b/ast.h
@@ -23,6 +23,7 @@
#include <stdio.h>
#include <stdbool.h>
#include <stdarg.h>
+#include <string.h>
#ifdef JSONC
#include <json.h>
@@ -47,11 +48,16 @@ enum ut_error_type {
UT_ERROR_EXCEPTION
};
-enum ut_block_type {
- UT_BLOCK_NONE,
- UT_BLOCK_STATEMENT,
- UT_BLOCK_EXPRESSION,
- UT_BLOCK_COMMENT
+enum ut_lex_state {
+ UT_LEX_IDENTIFY_BLOCK,
+ UT_LEX_BLOCK_COMMENT_START,
+ UT_LEX_BLOCK_EXPRESSION_START,
+ UT_LEX_BLOCK_EXPRESSION_EMIT_TAG,
+ UT_LEX_BLOCK_STATEMENT_START,
+ UT_LEX_BLOCK_COMMENT,
+ UT_LEX_IDENTIFY_TOKEN,
+ UT_LEX_PARSE_TOKEN,
+ UT_LEX_EOF
};
struct ut_op {
@@ -98,16 +104,31 @@ struct ut_state {
struct ut_op *pool;
uint32_t poolsize;
uint32_t main;
- uint8_t semicolon_emitted:1;
- uint8_t start_tag_seen:1;
uint8_t srand_called:1;
uint8_t trim_blocks:1;
uint8_t lstrip_blocks:1;
uint8_t strict_declarations:1;
uint8_t skip_shebang:1;
- uint8_t expect_div:1;
- size_t off;
- enum ut_block_type blocktype;
+ struct {
+ enum ut_lex_state state;
+ uint8_t eof:1;
+ uint8_t skip_leading_whitespace:1;
+ uint8_t skip_leading_newline:1;
+ uint8_t within_expression_block:1;
+ uint8_t within_statement_block:1;
+ uint8_t semicolon_emitted:1;
+ uint8_t expect_div:1;
+ uint8_t is_escape:1;
+ size_t buflen;
+ char *buf, *bufstart, *bufend;
+ size_t lookbehindlen;
+ char *lookbehind;
+ const void *tok;
+ char esc[5];
+ uint8_t esclen;
+ int lead_surrogate;
+ size_t off, lastoff;
+ } lex;
struct {
enum ut_error_type code;
union {