diff options
author | Jo-Philipp Wich <jo@mein.io> | 2022-07-14 14:33:12 +0200 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2022-07-28 13:18:30 +0200 |
commit | 03c8e4b465c8cffd2596d2741b29ad2ba4ec1765 (patch) | |
tree | 6a43c9f54be5e3de4fcbc73b5ebaa518e642d3ad /include | |
parent | 1219d7efa170bf38fb1bf6a10fa0d1f96e62f091 (diff) |
lexer: rewrite token scanner
- Use nested switches instead of lookup tables to detect tokens
- Simplify input buffer logic
- Reduce amount of intermediate states
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'include')
-rw-r--r-- | include/ucode/lexer.h | 23 |
1 files changed, 8 insertions, 15 deletions
diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h index 835bc2b..dbec129 100644 --- a/include/ucode/lexer.h +++ b/include/ucode/lexer.h @@ -124,14 +124,11 @@ typedef enum { typedef enum { UC_LEX_IDENTIFY_BLOCK, - UC_LEX_BLOCK_COMMENT_START, - UC_LEX_BLOCK_EXPRESSION_START, UC_LEX_BLOCK_EXPRESSION_EMIT_TAG, - UC_LEX_BLOCK_STATEMENT_START, UC_LEX_BLOCK_COMMENT, UC_LEX_IDENTIFY_TOKEN, - UC_LEX_PARSE_TOKEN, - UC_LEX_PLACEHOLDER, + UC_LEX_PLACEHOLDER_START, + UC_LEX_PLACEHOLDER_END, UC_LEX_EOF } uc_lex_state_t; @@ -145,19 +142,9 @@ typedef struct { uc_lex_state_t state; uc_parse_config_t *config; uc_source_t *source; - uint8_t eof:1; - uint8_t is_escape:1; - uint8_t is_placeholder:1; uint8_t no_regexp:1; uint8_t no_keyword:1; - size_t buflen; - char *buf, *bufstart, *bufend; - size_t lookbehindlen; - char *lookbehind; - const void *tok; uc_token_t curr; - char esc[5]; - uint8_t esclen; int lead_surrogate; size_t lastoff; enum { @@ -176,6 +163,12 @@ typedef struct { size_t count; size_t *entries; } templates; + struct { + size_t count; + char *entries; + } buffer; + unsigned char *rbuf; + size_t rlen, rpos; } uc_lexer_t; |