summaryrefslogtreecommitdiffhomepage
path: root/lexer.h
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2021-02-17 18:28:01 +0100
committerGitHub <noreply@github.com>2021-02-17 18:28:01 +0100
commit679270fd3afa93cca84ab31b5041922037fec0c5 (patch)
treee55752bae52bf7eed38b91c42e990a8b116b6621 /lexer.h
parent77580a893283f2bde7ab46496bd3a3d7b2fc6784 (diff)
parent14e46b8e225dc329f4e14777960b10abb8a09699 (diff)
Merge pull request #2 from jow-/rewrite
treewide: rewrite ucode interpreter
Diffstat (limited to 'lexer.h')
-rw-r--r--lexer.h150
1 files changed, 144 insertions, 6 deletions
diff --git a/lexer.h b/lexer.h
index e45d3e3..fd462ff 100644
--- a/lexer.h
+++ b/lexer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io>
+ * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -17,18 +17,156 @@
#ifndef __LEXER_H_
#define __LEXER_H_
-#include "ast.h"
+#include "source.h"
#define __T_MAX 82
#define T_EXCEPTION (__T_MAX + 0)
#define T_CFUNC (__T_MAX + 1)
#define T_RESSOURCE (__T_MAX + 2)
-bool
-utf8enc(char **out, int *rem, int code);
-uint32_t
-uc_get_token(struct uc_state *s, FILE *fp);
+typedef enum {
+ TK_LEXP = 1,
+ TK_REXP,
+ TK_LSTM,
+ TK_RSTM,
+ TK_IF,
+ TK_ELSE,
+ TK_COMMA,
+ TK_ASBAND,
+ TK_ASBXOR,
+ TK_ASBOR,
+ TK_ASLEFT,
+ TK_ASRIGHT,
+ TK_ASMUL,
+ TK_ASDIV,
+ TK_ASMOD,
+ TK_ASADD,
+ TK_ASSUB,
+ TK_ASSIGN,
+ TK_QMARK,
+ TK_COLON,
+ TK_OR,
+ TK_AND,
+ TK_BOR,
+ TK_BXOR,
+ TK_BAND,
+ TK_EQ,
+ TK_NE,
+ TK_EQS,
+ TK_NES,
+ TK_LT,
+ TK_LE,
+ TK_GT,
+ TK_GE,
+ TK_IN,
+ TK_LSHIFT,
+ TK_RSHIFT,
+ TK_ADD,
+ TK_SUB,
+ TK_MUL,
+ TK_DIV,
+ TK_MOD,
+ TK_NOT,
+ TK_COMPL,
+ TK_INC,
+ TK_DEC,
+ TK_LPAREN,
+ TK_LBRACK,
+ TK_TEXT,
+ TK_LBRACE,
+ TK_RBRACE,
+ TK_SCOL,
+ TK_RPAREN,
+ TK_ENDIF,
+ TK_ELIF,
+ TK_WHILE,
+ TK_ENDWHILE,
+ TK_FOR,
+ TK_ENDFOR,
+ TK_FUNC,
+ TK_LABEL,
+ TK_ENDFUNC,
+ TK_TRY,
+ TK_CATCH,
+ TK_SWITCH,
+ TK_CASE,
+ TK_DEFAULT,
+ TK_ELLIP,
+ TK_RETURN,
+ TK_BREAK,
+ TK_CONTINUE,
+ TK_LOCAL,
+ TK_ARROW,
+ TK_DOT,
+ TK_RBRACK,
+ TK_BOOL,
+ TK_NUMBER,
+ TK_DOUBLE,
+ TK_STRING,
+ TK_REGEXP,
+ TK_NULL,
+ TK_THIS,
+
+ TK_EOF,
+ TK_ERROR
+} uc_tokentype_t;
+
+typedef enum {
+ UT_LEX_IDENTIFY_BLOCK,
+ UT_LEX_BLOCK_COMMENT_START,
+ UT_LEX_BLOCK_EXPRESSION_START,
+ UT_LEX_BLOCK_EXPRESSION_EMIT_TAG,
+ UT_LEX_BLOCK_STATEMENT_START,
+ UT_LEX_BLOCK_COMMENT,
+ UT_LEX_IDENTIFY_TOKEN,
+ UT_LEX_PARSE_TOKEN,
+ UT_LEX_EOF
+} uc_lex_state_t;
+
+typedef struct {
+ uc_tokentype_t type;
+ json_object *val;
+ size_t pos;
+} uc_token;
+
+typedef struct {
+ bool lstrip_blocks;
+ bool trim_blocks;
+ bool strict_declarations;
+} uc_parse_config;
+
+typedef struct {
+ uc_lex_state_t state;
+ uc_parse_config *config;
+ uc_source *source;
+ uint8_t eof:1;
+ uint8_t skip_leading_whitespace:1;
+ uint8_t skip_leading_newline:1;
+ uint8_t within_expression_block:1;
+ uint8_t within_statement_block:1;
+ uint8_t semicolon_emitted:1;
+ uint8_t expect_div:1;
+ uint8_t is_escape:1;
+ size_t buflen;
+ char *buf, *bufstart, *bufend;
+ size_t lookbehindlen;
+ char *lookbehind;
+ const void *tok;
+ uc_token curr;
+ char esc[5];
+ uint8_t esclen;
+ int lead_surrogate;
+ size_t lastoff;
+} uc_lexer;
+
+
+void uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source);
+void uc_lexer_free(uc_lexer *lex);
+
+uc_token *uc_lexer_next_token(uc_lexer *lex);
+
+bool utf8enc(char **out, int *rem, int code);
const char *
uc_get_tokenname(int type);