diff options
-rw-r--r-- | compiler.c | 110 | ||||
-rw-r--r-- | lexer.c | 79 | ||||
-rw-r--r-- | lexer.h | 4 |
3 files changed, 97 insertions, 96 deletions
@@ -195,64 +195,11 @@ uc_compiler_set_srcpos(uc_compiler *compiler, size_t srcpos) static void uc_compiler_parse_advance(uc_compiler *compiler) { - bool no_regexp; - ucv_put(compiler->parser->prev.uv); compiler->parser->prev = compiler->parser->curr; while (true) { - /* Follow JSLint logic and treat a slash after any of the - * `(,=:[!&|?{};` characters as the beginning of a regex - * literal... */ - switch (compiler->parser->prev.type) { - case TK_LPAREN: - case TK_COMMA: - - case TK_ASADD: - case TK_ASBAND: - case TK_ASBOR: - case TK_ASBXOR: - case TK_ASDIV: - case TK_ASLEFT: - case TK_ASMOD: - case TK_ASMUL: - case TK_ASRIGHT: - case TK_ASSIGN: - case TK_ASSUB: - case TK_EQ: - case TK_EQS: - case TK_GE: - case TK_LE: - case TK_NE: - case TK_NES: - - case TK_COLON: - case TK_LBRACK: - case TK_NOT: - - case TK_AND: - case TK_BAND: - - case TK_OR: - case TK_BOR: - - case TK_QMARK: - - case TK_LBRACE: - case TK_RBRACE: - - case TK_LSTM: - case TK_LEXP: - - case TK_SCOL: - no_regexp = false; - break; - - default: - no_regexp = (compiler->parser->prev.type != 0); - } - - compiler->parser->curr = *uc_lexer_next_token(&compiler->parser->lex, no_regexp); + compiler->parser->curr = *uc_lexer_next_token(&compiler->parser->lex); if (compiler->parser->curr.type != TK_ERROR) break; @@ -359,22 +306,41 @@ uc_compiler_parse_precedence(uc_compiler *compiler, uc_precedence_t precedence) uc_parse_rule *rule; bool assignable; - uc_compiler_parse_advance(compiler); - - rule = uc_compiler_parse_rule(compiler->parser->prev.type); + rule = uc_compiler_parse_rule(compiler->parser->curr.type); if (!rule->prefix) { - uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, "Expecting expression"); + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, "Expecting expression"); + uc_compiler_parse_advance(compiler); return; } + /* allow reserved words as property names in object literals */ + if (rule->prefix == uc_compiler_compile_object) + compiler->parser->lex.no_keyword = true; + + /* unless a sub-expression follows, treat subsequent slash as division + * operator and not as beginning of regexp literal */ + if (rule->prefix != uc_compiler_compile_paren && + rule->prefix != uc_compiler_compile_unary && + rule->prefix != uc_compiler_compile_array) + compiler->parser->lex.no_regexp = true; + + uc_compiler_parse_advance(compiler); + assignable = (precedence <= P_ASSIGN); rule->prefix(compiler, assignable); while (precedence <= uc_compiler_parse_rule(compiler->parser->curr.type)->precedence) { + rule = uc_compiler_parse_rule(compiler->parser->curr.type); + + /* allow reserved words in property accessors */ + if (rule->infix == uc_compiler_compile_dot) + compiler->parser->lex.no_keyword = true; + uc_compiler_parse_advance(compiler); - uc_compiler_parse_rule(compiler->parser->prev.type)->infix(compiler, assignable); + + rule->infix(compiler, assignable); } if (assignable && uc_compiler_parse_at_assignment_op(compiler)) @@ -1207,7 +1173,14 @@ uc_compiler_compile_paren(uc_compiler *compiler, bool assignable) continue; } else { - maybe_arrowfn = uc_compiler_parse_match(compiler, TK_RPAREN); + maybe_arrowfn = uc_compiler_parse_check(compiler, TK_RPAREN); + + if (maybe_arrowfn) { + /* A subsequent slash cannot be a regular expression literal */ + compiler->parser->lex.no_regexp = true; + uc_compiler_parse_advance(compiler); + } + break; } } @@ -1276,6 +1249,9 @@ uc_compiler_compile_paren(uc_compiler *compiler, bool assignable) if (!uc_compiler_parse_check(compiler, TK_RPAREN)) uc_compiler_compile_expression(compiler); + /* A subsequent slash cannot be a regular expression literal */ + compiler->parser->lex.no_regexp = true; + /* At this point we expect the end of the parenthesized expression, anything * else is a syntax error */ uc_compiler_parse_consume(compiler, TK_RPAREN); @@ -1315,6 +1291,8 @@ uc_compiler_compile_call(uc_compiler *compiler, bool assignable) while (uc_compiler_parse_match(compiler, TK_COMMA)); } + /* after a function call expression, no regexp literal can follow */ + compiler->parser->lex.no_regexp = true; uc_compiler_parse_consume(compiler, TK_RPAREN); /* if lhs is a dot or bracket expression, emit a method call */ @@ -1564,6 +1542,9 @@ uc_compiler_compile_or(uc_compiler *compiler, bool assignable) static void uc_compiler_compile_dot(uc_compiler *compiler, bool assignable) { + /* no regexp literal possible after property access */ + compiler->parser->lex.no_regexp = true; + /* parse label lhs */ uc_compiler_parse_consume(compiler, TK_LABEL); uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.uv); @@ -1578,6 +1559,9 @@ uc_compiler_compile_subscript(uc_compiler *compiler, bool assignable) { /* compile lhs */ uc_compiler_compile_expression(compiler); + + /* no regexp literal possible after computed property access */ + compiler->parser->lex.no_regexp = true; uc_compiler_parse_consume(compiler, TK_RBRACK); /* depending on context, compile into I_UVAL, I_SVAL or I_LVAL operation */ @@ -1653,6 +1637,8 @@ uc_compiler_compile_array(uc_compiler *compiler, bool assignable) } while (uc_compiler_parse_match(compiler, TK_COMMA)); + /* no regexp literal possible after array literal */ + compiler->parser->lex.no_regexp = true; uc_compiler_parse_consume(compiler, TK_RBRACK); /* push items on stack */ @@ -1751,9 +1737,13 @@ uc_compiler_compile_object(uc_compiler *compiler, bool assignable) hint_count += 2; len += 2; + + compiler->parser->lex.no_keyword = true; } while (uc_compiler_parse_match(compiler, TK_COMMA)); + /* no regexp literal possible after object literal */ + compiler->parser->lex.no_regexp = true; uc_compiler_parse_consume(compiler, TK_RBRACE); /* set items on stack */ @@ -48,7 +48,7 @@ struct token { char pat[4]; } u; unsigned plen; - uc_token *(*parse)(uc_lexer *, bool); + uc_token *(*parse)(uc_lexer *); }; #define dec(o) \ @@ -58,11 +58,11 @@ struct token { (((x) >= 'a') ? (10 + (x) - 'a') : \ (((x) >= 'A') ? (10 + (x) - 'A') : dec(x))) -static uc_token *parse_comment(uc_lexer *, bool); -static uc_token *parse_string(uc_lexer *, bool); -static uc_token *parse_regexp(uc_lexer *, bool); -static uc_token *parse_number(uc_lexer *, bool); -static uc_token *parse_label(uc_lexer *, bool); +static uc_token *parse_comment(uc_lexer *); +static uc_token *parse_string(uc_lexer *); +static uc_token *parse_regexp(uc_lexer *); +static uc_token *parse_number(uc_lexer *); +static uc_token *parse_label(uc_lexer *); static const struct token tokens[] = { { TK_ASLEFT, { .pat = "<<=" }, 3, NULL }, @@ -353,7 +353,7 @@ buf_consume(uc_lexer *lex, size_t len) { } static uc_token * -parse_comment(uc_lexer *lex, bool no_regexp) +parse_comment(uc_lexer *lex) { const struct token *tok = lex->tok; const char *ptr, *end; @@ -397,7 +397,7 @@ append_utf8(uc_lexer *lex, int code) { } static uc_token * -parse_string(uc_lexer *lex, bool no_regexp) +parse_string(uc_lexer *lex) { const struct token *tok = lex->tok; char q = tok->u.pat[0]; @@ -625,7 +625,7 @@ enum { }; static uc_token * -parse_regexp(uc_lexer *lex, bool no_regexp) +parse_regexp(uc_lexer *lex) { bool is_reg_global = false, is_reg_icase = false, is_reg_newline = false; uc_token *rv; @@ -634,7 +634,7 @@ parse_regexp(uc_lexer *lex, bool no_regexp) switch (lex->esc[0]) { case UT_LEX_PARSE_REGEX_INIT: - if (no_regexp) { + if (lex->no_regexp) { if (buf_startswith(lex, "=")) { buf_consume(lex, 1); @@ -648,7 +648,7 @@ parse_regexp(uc_lexer *lex, bool no_regexp) break; case UT_LEX_PARSE_REGEX_PATTERN: - rv = parse_string(lex, no_regexp); + rv = parse_string(lex); if (rv && rv->type == TK_ERROR) return rv; @@ -716,7 +716,7 @@ parse_regexp(uc_lexer *lex, bool no_regexp) */ static uc_token * -parse_label(uc_lexer *lex, bool no_regexp) +parse_label(uc_lexer *lex) { const struct token *tok = lex->tok; const struct keyword *word; @@ -728,24 +728,26 @@ parse_label(uc_lexer *lex, bool no_regexp) lookbehind_append(lex, tok->u.pat, tok->plen); if (!buf_remaining(lex) || (lex->bufstart[0] != '_' && !isalnum(lex->bufstart[0]))) { - for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) { - if (lex->lookbehind && lex->lookbehindlen == word->plen && !strncmp(lex->lookbehind, word->pat, word->plen)) { - lookbehind_reset(lex); - - switch (word->type) { - case TK_DOUBLE: - rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_double_new(word->u.d)); - break; - - case TK_BOOL: - rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_boolean_new(word->u.b)); - break; + if (lex->no_keyword == false) { + for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) { + if (lex->lookbehind && lex->lookbehindlen == word->plen && !strncmp(lex->lookbehind, word->pat, word->plen)) { + lookbehind_reset(lex); + + switch (word->type) { + case TK_DOUBLE: + rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_double_new(word->u.d)); + break; + + case TK_BOOL: + rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_boolean_new(word->u.b)); + break; + + default: + rv = emit_op(lex, lex->source->off - word->plen, word->type, NULL); + } - default: - rv = emit_op(lex, lex->source->off - word->plen, word->type, NULL); + return rv; } - - return rv; } } @@ -784,7 +786,7 @@ is_numeric_char(uc_lexer *lex, char c) } static uc_token * -parse_number(uc_lexer *lex, bool no_regexp) +parse_number(uc_lexer *lex) { const struct token *tok = lex->tok; uc_token *rv = NULL; @@ -837,7 +839,7 @@ parse_number(uc_lexer *lex, bool no_regexp) } static uc_token * -lex_step(uc_lexer *lex, FILE *fp, bool no_regexp) +lex_step(uc_lexer *lex, FILE *fp) { uint32_t masks[] = { 0, le32toh(0x000000ff), le32toh(0x0000ffff), le32toh(0x00ffffff), le32toh(0xffffffff) }; union { uint32_t n; char str[4]; } search; @@ -1110,7 +1112,7 @@ lex_step(uc_lexer *lex, FILE *fp, bool no_regexp) case UT_LEX_PARSE_TOKEN: tok = lex->tok; - rv = tok->parse(lex, no_regexp); + rv = tok->parse(lex); if (rv) { memset(lex->esc, 0, sizeof(lex->esc)); @@ -1175,15 +1177,22 @@ uc_lexer_free(uc_lexer *lex) } uc_token * -uc_lexer_next_token(uc_lexer *lex, bool no_regexp) +uc_lexer_next_token(uc_lexer *lex) { - uc_token *rv; + uc_token *rv = NULL; while (lex->state != UT_LEX_EOF) { - rv = lex_step(lex, lex->source->fp, no_regexp); + rv = lex_step(lex, lex->source->fp); if (rv != NULL) - return rv; + break; + } + + if (rv) { + lex->no_keyword = false; + lex->no_regexp = false; + + return rv; } return emit_op(lex, lex->source->off, TK_EOF, NULL); @@ -132,6 +132,8 @@ typedef struct { uc_source *source; uint8_t eof:1; uint8_t is_escape:1; + uint8_t no_regexp:1; + uint8_t no_keyword:1; size_t buflen; char *buf, *bufstart, *bufend; size_t lookbehindlen; @@ -160,7 +162,7 @@ typedef struct { void uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source); void uc_lexer_free(uc_lexer *lex); -uc_token *uc_lexer_next_token(uc_lexer *lex, bool no_regexp); +uc_token *uc_lexer_next_token(uc_lexer *lex); bool utf8enc(char **out, int *rem, int code); |