diff options
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 56 |
1 files changed, 41 insertions, 15 deletions
@@ -145,17 +145,21 @@ emit_op(uc_lexer_t *lex, ssize_t pos, int type, uc_value_t *uv) else lex->curr.pos = (size_t)pos; + lex->curr.end = lex->source->off; + return &lex->curr; } static uc_token_t * emit_buffer(uc_lexer_t *lex, ssize_t pos, int type, const char *strip_trailing_chars) { uc_token_t *rv = NULL; + char *p; if (lex->buffer.count) { if (strip_trailing_chars) - while (lex->buffer.count > 0 && strchr(strip_trailing_chars, *uc_vector_last(&lex->buffer))) - lex->buffer.count--; + for (p = uc_vector_last(&lex->buffer); + p && strchr(strip_trailing_chars, *p); + lex->buffer.count--, p = uc_vector_last(&lex->buffer)); rv = emit_op(lex, pos, type, ucv_string_new_length(uc_vector_first(&lex->buffer), lex->buffer.count)); @@ -172,16 +176,23 @@ emit_buffer(uc_lexer_t *lex, ssize_t pos, int type, const char *strip_trailing_c static uc_token_t * parse_comment(uc_lexer_t *lex, int kind) { + size_t off = lex->source->off - 1; int ch; + uc_vector_push(&lex->buffer, '/'); + while (true) { ch = next_char(lex); + uc_vector_push(&lex->buffer, ch); + if (kind == '/' && (ch == '\n' || ch == EOF)) break; - if (kind == '*' && ch == '*' && check_char(lex, '/')) + if (kind == '*' && ch == '*' && check_char(lex, '/')) { + uc_vector_push(&lex->buffer, '/'); break; + } if (ch == EOF) { lex->state = UC_LEX_EOF; @@ -190,7 +201,7 @@ parse_comment(uc_lexer_t *lex, int kind) } } - return NULL; + return emit_buffer(lex, off, TK_COMMENT, NULL); } static void @@ -338,7 +349,7 @@ parse_escape(uc_lexer_t *lex, const char *regex_macros) static uc_token_t * parse_string(uc_lexer_t *lex, int kind) { - uc_token_t *err; + uc_token_t *err, *tok; unsigned type; int code, ch; size_t off; @@ -359,7 +370,10 @@ parse_string(uc_lexer_t *lex, int kind) if (type == TK_TEMPLATE && check_char(lex, '{')) { lex->state = UC_LEX_PLACEHOLDER_START; - return emit_buffer(lex, off, type, NULL); + tok = emit_buffer(lex, off, type, NULL); + tok->end -= 2; + + return tok; } uc_vector_push(&lex->buffer, '$'); @@ -952,8 +966,7 @@ lex_step(uc_lexer_t *lex) /* found start of statement block */ case '%': - lex->state = UC_LEX_IDENTIFY_TOKEN; - lex->block = STATEMENTS; + lex->state = UC_LEX_BLOCK_STATEMENT_EMIT_TAG; if (check_char(lex, '-')) strip = " \n\t\v\f\r"; @@ -987,6 +1000,8 @@ lex_step(uc_lexer_t *lex) if (!tok) continue; + tok->end -= 2; + return tok; @@ -1012,18 +1027,24 @@ lex_step(uc_lexer_t *lex) return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated template block")); } + tok = emit_op(lex, lex->lastoff, TK_COMMENT, NULL); + lex->lastoff = lex->source->off; lex->state = UC_LEX_IDENTIFY_BLOCK; - continue; - + return tok; case UC_LEX_BLOCK_EXPRESSION_EMIT_TAG: lex->state = UC_LEX_IDENTIFY_TOKEN; lex->block = EXPRESSION; - return emit_op(lex, lex->source->off, TK_LEXP, NULL); + return emit_op(lex, lex->source->off - 2, TK_LEXP, NULL); + case UC_LEX_BLOCK_STATEMENT_EMIT_TAG: + lex->state = UC_LEX_IDENTIFY_TOKEN; + lex->block = STATEMENTS; + + return emit_op(lex, lex->source->off - 2, TK_LSTM, NULL); case UC_LEX_IDENTIFY_TOKEN: do { tok = lex_find_token(lex); } while (tok == NULL); @@ -1042,7 +1063,7 @@ lex_step(uc_lexer_t *lex) lex->state = UC_LEX_IDENTIFY_BLOCK; lex->block = NONE; - tok = emit_op(lex, -2, TK_SCOL, NULL); + tok = emit_op(lex, -2, TK_RSTM, NULL); } /* found end of expression block */ @@ -1092,7 +1113,10 @@ lex_step(uc_lexer_t *lex) case UC_LEX_PLACEHOLDER_END: lex->state = UC_LEX_IDENTIFY_TOKEN; - return parse_string(lex, '`'); + tok = parse_string(lex, '`'); + tok->pos++; + + return tok; case UC_LEX_EOF: @@ -1152,8 +1176,10 @@ uc_lexer_next_token(uc_lexer_t *lex) rv = lex_step(lex); - lex->no_keyword = false; - lex->no_regexp = false; + if (rv && rv->type != TK_COMMENT) { + lex->no_keyword = false; + lex->no_regexp = false; + } return rv; } |