diff options
author | Jo-Philipp Wich <jo@mein.io> | 2021-02-17 18:28:01 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-17 18:28:01 +0100 |
commit | 679270fd3afa93cca84ab31b5041922037fec0c5 (patch) | |
tree | e55752bae52bf7eed38b91c42e990a8b116b6621 /lexer.h | |
parent | 77580a893283f2bde7ab46496bd3a3d7b2fc6784 (diff) | |
parent | 14e46b8e225dc329f4e14777960b10abb8a09699 (diff) |
Merge pull request #2 from jow-/rewrite
treewide: rewrite ucode interpreter
Diffstat (limited to 'lexer.h')
-rw-r--r-- | lexer.h | 150 |
1 files changed, 144 insertions, 6 deletions
@@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,18 +17,156 @@ #ifndef __LEXER_H_ #define __LEXER_H_ -#include "ast.h" +#include "source.h" #define __T_MAX 82 #define T_EXCEPTION (__T_MAX + 0) #define T_CFUNC (__T_MAX + 1) #define T_RESSOURCE (__T_MAX + 2) -bool -utf8enc(char **out, int *rem, int code); -uint32_t -uc_get_token(struct uc_state *s, FILE *fp); +typedef enum { + TK_LEXP = 1, + TK_REXP, + TK_LSTM, + TK_RSTM, + TK_IF, + TK_ELSE, + TK_COMMA, + TK_ASBAND, + TK_ASBXOR, + TK_ASBOR, + TK_ASLEFT, + TK_ASRIGHT, + TK_ASMUL, + TK_ASDIV, + TK_ASMOD, + TK_ASADD, + TK_ASSUB, + TK_ASSIGN, + TK_QMARK, + TK_COLON, + TK_OR, + TK_AND, + TK_BOR, + TK_BXOR, + TK_BAND, + TK_EQ, + TK_NE, + TK_EQS, + TK_NES, + TK_LT, + TK_LE, + TK_GT, + TK_GE, + TK_IN, + TK_LSHIFT, + TK_RSHIFT, + TK_ADD, + TK_SUB, + TK_MUL, + TK_DIV, + TK_MOD, + TK_NOT, + TK_COMPL, + TK_INC, + TK_DEC, + TK_LPAREN, + TK_LBRACK, + TK_TEXT, + TK_LBRACE, + TK_RBRACE, + TK_SCOL, + TK_RPAREN, + TK_ENDIF, + TK_ELIF, + TK_WHILE, + TK_ENDWHILE, + TK_FOR, + TK_ENDFOR, + TK_FUNC, + TK_LABEL, + TK_ENDFUNC, + TK_TRY, + TK_CATCH, + TK_SWITCH, + TK_CASE, + TK_DEFAULT, + TK_ELLIP, + TK_RETURN, + TK_BREAK, + TK_CONTINUE, + TK_LOCAL, + TK_ARROW, + TK_DOT, + TK_RBRACK, + TK_BOOL, + TK_NUMBER, + TK_DOUBLE, + TK_STRING, + TK_REGEXP, + TK_NULL, + TK_THIS, + + TK_EOF, + TK_ERROR +} uc_tokentype_t; + +typedef enum { + UT_LEX_IDENTIFY_BLOCK, + UT_LEX_BLOCK_COMMENT_START, + UT_LEX_BLOCK_EXPRESSION_START, + UT_LEX_BLOCK_EXPRESSION_EMIT_TAG, + UT_LEX_BLOCK_STATEMENT_START, + UT_LEX_BLOCK_COMMENT, + UT_LEX_IDENTIFY_TOKEN, + UT_LEX_PARSE_TOKEN, + UT_LEX_EOF +} uc_lex_state_t; + +typedef struct { + uc_tokentype_t type; + json_object *val; + size_t pos; +} uc_token; + +typedef struct { + bool lstrip_blocks; + bool trim_blocks; + bool strict_declarations; +} uc_parse_config; + +typedef struct { + uc_lex_state_t state; + uc_parse_config *config; + uc_source *source; + uint8_t eof:1; + uint8_t skip_leading_whitespace:1; + uint8_t skip_leading_newline:1; + uint8_t within_expression_block:1; + uint8_t within_statement_block:1; + uint8_t semicolon_emitted:1; + uint8_t expect_div:1; + uint8_t is_escape:1; + size_t buflen; + char *buf, *bufstart, *bufend; + size_t lookbehindlen; + char *lookbehind; + const void *tok; + uc_token curr; + char esc[5]; + uint8_t esclen; + int lead_surrogate; + size_t lastoff; +} uc_lexer; + + +void uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source); +void uc_lexer_free(uc_lexer *lex); + +uc_token *uc_lexer_next_token(uc_lexer *lex); + +bool utf8enc(char **out, int *rem, int code); const char * uc_get_tokenname(int type); |