summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2022-07-14 14:33:12 +0200
committerJo-Philipp Wich <jo@mein.io>2022-07-28 13:18:30 +0200
commit03c8e4b465c8cffd2596d2741b29ad2ba4ec1765 (patch)
tree6a43c9f54be5e3de4fcbc73b5ebaa518e642d3ad /include
parent1219d7efa170bf38fb1bf6a10fa0d1f96e62f091 (diff)
lexer: rewrite token scanner
- Use nested switches instead of lookup tables to detect tokens - Simplify input buffer logic - Reduce amount of intermediate states Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'include')
-rw-r--r--include/ucode/lexer.h23
1 files changed, 8 insertions, 15 deletions
diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h
index 835bc2b..dbec129 100644
--- a/include/ucode/lexer.h
+++ b/include/ucode/lexer.h
@@ -124,14 +124,11 @@ typedef enum {
typedef enum {
UC_LEX_IDENTIFY_BLOCK,
- UC_LEX_BLOCK_COMMENT_START,
- UC_LEX_BLOCK_EXPRESSION_START,
UC_LEX_BLOCK_EXPRESSION_EMIT_TAG,
- UC_LEX_BLOCK_STATEMENT_START,
UC_LEX_BLOCK_COMMENT,
UC_LEX_IDENTIFY_TOKEN,
- UC_LEX_PARSE_TOKEN,
- UC_LEX_PLACEHOLDER,
+ UC_LEX_PLACEHOLDER_START,
+ UC_LEX_PLACEHOLDER_END,
UC_LEX_EOF
} uc_lex_state_t;
@@ -145,19 +142,9 @@ typedef struct {
uc_lex_state_t state;
uc_parse_config_t *config;
uc_source_t *source;
- uint8_t eof:1;
- uint8_t is_escape:1;
- uint8_t is_placeholder:1;
uint8_t no_regexp:1;
uint8_t no_keyword:1;
- size_t buflen;
- char *buf, *bufstart, *bufend;
- size_t lookbehindlen;
- char *lookbehind;
- const void *tok;
uc_token_t curr;
- char esc[5];
- uint8_t esclen;
int lead_surrogate;
size_t lastoff;
enum {
@@ -176,6 +163,12 @@ typedef struct {
size_t count;
size_t *entries;
} templates;
+ struct {
+ size_t count;
+ char *entries;
+ } buffer;
+ unsigned char *rbuf;
+ size_t rlen, rpos;
} uc_lexer_t;