summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--compiler.c110
-rw-r--r--lexer.c79
-rw-r--r--lexer.h4
3 files changed, 97 insertions, 96 deletions
diff --git a/compiler.c b/compiler.c
index fd5b3af..c5cfef5 100644
--- a/compiler.c
+++ b/compiler.c
@@ -195,64 +195,11 @@ uc_compiler_set_srcpos(uc_compiler *compiler, size_t srcpos)
static void
uc_compiler_parse_advance(uc_compiler *compiler)
{
- bool no_regexp;
-
ucv_put(compiler->parser->prev.uv);
compiler->parser->prev = compiler->parser->curr;
while (true) {
- /* Follow JSLint logic and treat a slash after any of the
- * `(,=:[!&|?{};` characters as the beginning of a regex
- * literal... */
- switch (compiler->parser->prev.type) {
- case TK_LPAREN:
- case TK_COMMA:
-
- case TK_ASADD:
- case TK_ASBAND:
- case TK_ASBOR:
- case TK_ASBXOR:
- case TK_ASDIV:
- case TK_ASLEFT:
- case TK_ASMOD:
- case TK_ASMUL:
- case TK_ASRIGHT:
- case TK_ASSIGN:
- case TK_ASSUB:
- case TK_EQ:
- case TK_EQS:
- case TK_GE:
- case TK_LE:
- case TK_NE:
- case TK_NES:
-
- case TK_COLON:
- case TK_LBRACK:
- case TK_NOT:
-
- case TK_AND:
- case TK_BAND:
-
- case TK_OR:
- case TK_BOR:
-
- case TK_QMARK:
-
- case TK_LBRACE:
- case TK_RBRACE:
-
- case TK_LSTM:
- case TK_LEXP:
-
- case TK_SCOL:
- no_regexp = false;
- break;
-
- default:
- no_regexp = (compiler->parser->prev.type != 0);
- }
-
- compiler->parser->curr = *uc_lexer_next_token(&compiler->parser->lex, no_regexp);
+ compiler->parser->curr = *uc_lexer_next_token(&compiler->parser->lex);
if (compiler->parser->curr.type != TK_ERROR)
break;
@@ -359,22 +306,41 @@ uc_compiler_parse_precedence(uc_compiler *compiler, uc_precedence_t precedence)
uc_parse_rule *rule;
bool assignable;
- uc_compiler_parse_advance(compiler);
-
- rule = uc_compiler_parse_rule(compiler->parser->prev.type);
+ rule = uc_compiler_parse_rule(compiler->parser->curr.type);
if (!rule->prefix) {
- uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, "Expecting expression");
+ uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, "Expecting expression");
+ uc_compiler_parse_advance(compiler);
return;
}
+ /* allow reserved words as property names in object literals */
+ if (rule->prefix == uc_compiler_compile_object)
+ compiler->parser->lex.no_keyword = true;
+
+ /* unless a sub-expression follows, treat subsequent slash as division
+ * operator and not as beginning of regexp literal */
+ if (rule->prefix != uc_compiler_compile_paren &&
+ rule->prefix != uc_compiler_compile_unary &&
+ rule->prefix != uc_compiler_compile_array)
+ compiler->parser->lex.no_regexp = true;
+
+ uc_compiler_parse_advance(compiler);
+
assignable = (precedence <= P_ASSIGN);
rule->prefix(compiler, assignable);
while (precedence <= uc_compiler_parse_rule(compiler->parser->curr.type)->precedence) {
+ rule = uc_compiler_parse_rule(compiler->parser->curr.type);
+
+ /* allow reserved words in property accessors */
+ if (rule->infix == uc_compiler_compile_dot)
+ compiler->parser->lex.no_keyword = true;
+
uc_compiler_parse_advance(compiler);
- uc_compiler_parse_rule(compiler->parser->prev.type)->infix(compiler, assignable);
+
+ rule->infix(compiler, assignable);
}
if (assignable && uc_compiler_parse_at_assignment_op(compiler))
@@ -1207,7 +1173,14 @@ uc_compiler_compile_paren(uc_compiler *compiler, bool assignable)
continue;
}
else {
- maybe_arrowfn = uc_compiler_parse_match(compiler, TK_RPAREN);
+ maybe_arrowfn = uc_compiler_parse_check(compiler, TK_RPAREN);
+
+ if (maybe_arrowfn) {
+ /* A subsequent slash cannot be a regular expression literal */
+ compiler->parser->lex.no_regexp = true;
+ uc_compiler_parse_advance(compiler);
+ }
+
break;
}
}
@@ -1276,6 +1249,9 @@ uc_compiler_compile_paren(uc_compiler *compiler, bool assignable)
if (!uc_compiler_parse_check(compiler, TK_RPAREN))
uc_compiler_compile_expression(compiler);
+ /* A subsequent slash cannot be a regular expression literal */
+ compiler->parser->lex.no_regexp = true;
+
/* At this point we expect the end of the parenthesized expression, anything
* else is a syntax error */
uc_compiler_parse_consume(compiler, TK_RPAREN);
@@ -1315,6 +1291,8 @@ uc_compiler_compile_call(uc_compiler *compiler, bool assignable)
while (uc_compiler_parse_match(compiler, TK_COMMA));
}
+ /* after a function call expression, no regexp literal can follow */
+ compiler->parser->lex.no_regexp = true;
uc_compiler_parse_consume(compiler, TK_RPAREN);
/* if lhs is a dot or bracket expression, emit a method call */
@@ -1564,6 +1542,9 @@ uc_compiler_compile_or(uc_compiler *compiler, bool assignable)
static void
uc_compiler_compile_dot(uc_compiler *compiler, bool assignable)
{
+ /* no regexp literal possible after property access */
+ compiler->parser->lex.no_regexp = true;
+
/* parse label lhs */
uc_compiler_parse_consume(compiler, TK_LABEL);
uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.uv);
@@ -1578,6 +1559,9 @@ uc_compiler_compile_subscript(uc_compiler *compiler, bool assignable)
{
/* compile lhs */
uc_compiler_compile_expression(compiler);
+
+ /* no regexp literal possible after computed property access */
+ compiler->parser->lex.no_regexp = true;
uc_compiler_parse_consume(compiler, TK_RBRACK);
/* depending on context, compile into I_UVAL, I_SVAL or I_LVAL operation */
@@ -1653,6 +1637,8 @@ uc_compiler_compile_array(uc_compiler *compiler, bool assignable)
}
while (uc_compiler_parse_match(compiler, TK_COMMA));
+ /* no regexp literal possible after array literal */
+ compiler->parser->lex.no_regexp = true;
uc_compiler_parse_consume(compiler, TK_RBRACK);
/* push items on stack */
@@ -1751,9 +1737,13 @@ uc_compiler_compile_object(uc_compiler *compiler, bool assignable)
hint_count += 2;
len += 2;
+
+ compiler->parser->lex.no_keyword = true;
}
while (uc_compiler_parse_match(compiler, TK_COMMA));
+ /* no regexp literal possible after object literal */
+ compiler->parser->lex.no_regexp = true;
uc_compiler_parse_consume(compiler, TK_RBRACE);
/* set items on stack */
diff --git a/lexer.c b/lexer.c
index aaba314..0fbea79 100644
--- a/lexer.c
+++ b/lexer.c
@@ -48,7 +48,7 @@ struct token {
char pat[4];
} u;
unsigned plen;
- uc_token *(*parse)(uc_lexer *, bool);
+ uc_token *(*parse)(uc_lexer *);
};
#define dec(o) \
@@ -58,11 +58,11 @@ struct token {
(((x) >= 'a') ? (10 + (x) - 'a') : \
(((x) >= 'A') ? (10 + (x) - 'A') : dec(x)))
-static uc_token *parse_comment(uc_lexer *, bool);
-static uc_token *parse_string(uc_lexer *, bool);
-static uc_token *parse_regexp(uc_lexer *, bool);
-static uc_token *parse_number(uc_lexer *, bool);
-static uc_token *parse_label(uc_lexer *, bool);
+static uc_token *parse_comment(uc_lexer *);
+static uc_token *parse_string(uc_lexer *);
+static uc_token *parse_regexp(uc_lexer *);
+static uc_token *parse_number(uc_lexer *);
+static uc_token *parse_label(uc_lexer *);
static const struct token tokens[] = {
{ TK_ASLEFT, { .pat = "<<=" }, 3, NULL },
@@ -353,7 +353,7 @@ buf_consume(uc_lexer *lex, size_t len) {
}
static uc_token *
-parse_comment(uc_lexer *lex, bool no_regexp)
+parse_comment(uc_lexer *lex)
{
const struct token *tok = lex->tok;
const char *ptr, *end;
@@ -397,7 +397,7 @@ append_utf8(uc_lexer *lex, int code) {
}
static uc_token *
-parse_string(uc_lexer *lex, bool no_regexp)
+parse_string(uc_lexer *lex)
{
const struct token *tok = lex->tok;
char q = tok->u.pat[0];
@@ -625,7 +625,7 @@ enum {
};
static uc_token *
-parse_regexp(uc_lexer *lex, bool no_regexp)
+parse_regexp(uc_lexer *lex)
{
bool is_reg_global = false, is_reg_icase = false, is_reg_newline = false;
uc_token *rv;
@@ -634,7 +634,7 @@ parse_regexp(uc_lexer *lex, bool no_regexp)
switch (lex->esc[0]) {
case UT_LEX_PARSE_REGEX_INIT:
- if (no_regexp) {
+ if (lex->no_regexp) {
if (buf_startswith(lex, "=")) {
buf_consume(lex, 1);
@@ -648,7 +648,7 @@ parse_regexp(uc_lexer *lex, bool no_regexp)
break;
case UT_LEX_PARSE_REGEX_PATTERN:
- rv = parse_string(lex, no_regexp);
+ rv = parse_string(lex);
if (rv && rv->type == TK_ERROR)
return rv;
@@ -716,7 +716,7 @@ parse_regexp(uc_lexer *lex, bool no_regexp)
*/
static uc_token *
-parse_label(uc_lexer *lex, bool no_regexp)
+parse_label(uc_lexer *lex)
{
const struct token *tok = lex->tok;
const struct keyword *word;
@@ -728,24 +728,26 @@ parse_label(uc_lexer *lex, bool no_regexp)
lookbehind_append(lex, tok->u.pat, tok->plen);
if (!buf_remaining(lex) || (lex->bufstart[0] != '_' && !isalnum(lex->bufstart[0]))) {
- for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) {
- if (lex->lookbehind && lex->lookbehindlen == word->plen && !strncmp(lex->lookbehind, word->pat, word->plen)) {
- lookbehind_reset(lex);
-
- switch (word->type) {
- case TK_DOUBLE:
- rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_double_new(word->u.d));
- break;
-
- case TK_BOOL:
- rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_boolean_new(word->u.b));
- break;
+ if (lex->no_keyword == false) {
+ for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) {
+ if (lex->lookbehind && lex->lookbehindlen == word->plen && !strncmp(lex->lookbehind, word->pat, word->plen)) {
+ lookbehind_reset(lex);
+
+ switch (word->type) {
+ case TK_DOUBLE:
+ rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_double_new(word->u.d));
+ break;
+
+ case TK_BOOL:
+ rv = emit_op(lex, lex->source->off - word->plen, word->type, ucv_boolean_new(word->u.b));
+ break;
+
+ default:
+ rv = emit_op(lex, lex->source->off - word->plen, word->type, NULL);
+ }
- default:
- rv = emit_op(lex, lex->source->off - word->plen, word->type, NULL);
+ return rv;
}
-
- return rv;
}
}
@@ -784,7 +786,7 @@ is_numeric_char(uc_lexer *lex, char c)
}
static uc_token *
-parse_number(uc_lexer *lex, bool no_regexp)
+parse_number(uc_lexer *lex)
{
const struct token *tok = lex->tok;
uc_token *rv = NULL;
@@ -837,7 +839,7 @@ parse_number(uc_lexer *lex, bool no_regexp)
}
static uc_token *
-lex_step(uc_lexer *lex, FILE *fp, bool no_regexp)
+lex_step(uc_lexer *lex, FILE *fp)
{
uint32_t masks[] = { 0, le32toh(0x000000ff), le32toh(0x0000ffff), le32toh(0x00ffffff), le32toh(0xffffffff) };
union { uint32_t n; char str[4]; } search;
@@ -1110,7 +1112,7 @@ lex_step(uc_lexer *lex, FILE *fp, bool no_regexp)
case UT_LEX_PARSE_TOKEN:
tok = lex->tok;
- rv = tok->parse(lex, no_regexp);
+ rv = tok->parse(lex);
if (rv) {
memset(lex->esc, 0, sizeof(lex->esc));
@@ -1175,15 +1177,22 @@ uc_lexer_free(uc_lexer *lex)
}
uc_token *
-uc_lexer_next_token(uc_lexer *lex, bool no_regexp)
+uc_lexer_next_token(uc_lexer *lex)
{
- uc_token *rv;
+ uc_token *rv = NULL;
while (lex->state != UT_LEX_EOF) {
- rv = lex_step(lex, lex->source->fp, no_regexp);
+ rv = lex_step(lex, lex->source->fp);
if (rv != NULL)
- return rv;
+ break;
+ }
+
+ if (rv) {
+ lex->no_keyword = false;
+ lex->no_regexp = false;
+
+ return rv;
}
return emit_op(lex, lex->source->off, TK_EOF, NULL);
diff --git a/lexer.h b/lexer.h
index 069e9e0..60f6ce0 100644
--- a/lexer.h
+++ b/lexer.h
@@ -132,6 +132,8 @@ typedef struct {
uc_source *source;
uint8_t eof:1;
uint8_t is_escape:1;
+ uint8_t no_regexp:1;
+ uint8_t no_keyword:1;
size_t buflen;
char *buf, *bufstart, *bufend;
size_t lookbehindlen;
@@ -160,7 +162,7 @@ typedef struct {
void uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source);
void uc_lexer_free(uc_lexer *lex);
-uc_token *uc_lexer_next_token(uc_lexer *lex, bool no_regexp);
+uc_token *uc_lexer_next_token(uc_lexer *lex);
bool utf8enc(char **out, int *rem, int code);