diff options
author | Jo-Philipp Wich <jo@mein.io> | 2021-12-13 12:58:18 +0100 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2022-01-04 15:53:36 +0100 |
commit | b605dbfcf04f310e08634b52507da7a4155bfce1 (patch) | |
tree | 04397dab9be96a5978e08366299671a8aa507267 /lexer.c | |
parent | 8907ce41a36f8d42097d884550fb3cfbba62e6c5 (diff) |
treewide: rework numeric value handling
- Parse integer literals as unsigned numeric values in order to be able
to represent the entire unsigned 64bit value range
- Stop parsing minus-prefixed integer literals as negative numbers but
treat them as separate minus operator followed by a positive integer
instead
- Only store unsigned numeric constants in bytecode
- Rework numeric comparison logic to be able to handle full 64bit
unsigned integers
- If possible, yield unsigned 64 bit results for additions
- Simplify numeric value conversion API
- Compile code with -fwrapv for defined signed overflow semantics
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 40 |
1 files changed, 13 insertions, 27 deletions
@@ -121,7 +121,7 @@ static const struct token tokens[] = { { TK_RPAREN, { .pat = ")" }, 1, NULL }, { TK_QMARK, { .pat = "?" }, 1, NULL }, { TK_SCOL, { .pat = ";" }, 1, NULL }, - //{ TK_SUB, { .pat = "-" }, 1, NULL }, + { TK_SUB, { .pat = "-" }, 1, NULL }, { TK_DOT, { .pat = "." }, 1, NULL }, { TK_STRING, { .pat = "'" }, 1, parse_string }, { TK_STRING, { .pat = "\"" }, 1, parse_string }, @@ -129,7 +129,6 @@ static const struct token tokens[] = { { TK_LABEL, { .pat = "_" }, 1, parse_label }, { TK_LABEL, { .pat = "az" }, 0, parse_label }, { TK_LABEL, { .pat = "AZ" }, 0, parse_label }, - { TK_NUMBER, { .pat = "-" }, 1, parse_number }, { TK_NUMBER, { .pat = "09" }, 0, parse_number }, }; @@ -779,39 +778,26 @@ is_numeric_char(uc_lexer_t *lex, char c) static uc_token_t * parse_number(uc_lexer_t *lex) { - const struct token *tok = lex->tok; uc_token_t *rv = NULL; - long long int n; - char *ptr, *e; - double d; + uc_value_t *nv = NULL; + const char *ptr; + char *e; if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) { - if (lex->lookbehindlen == 0 && !is_numeric_char(lex, lex->bufstart[0])) - return emit_op(lex, lex->source->off, TK_SUB, NULL); - lookbehind_append(lex, "\0", 1); - n = strtoll(lex->lookbehind, &e, 0); - - if (*e == '.' || *e == 'e' || *e == 'E') { - d = strtod(lex->lookbehind, &e); + nv = uc_number_parse(lex->lookbehind, &e); - if (tok->u.pat[0] == '-') - d = -d; + switch (ucv_type(nv)) { + case UC_DOUBLE: + rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, nv); + break; - if (e > lex->lookbehind && *e == 0) - rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, ucv_double_new(d)); - else - rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal")); - } - else if (*e == 0) { - if (tok->u.pat[0] == '-') - n = (errno == ERANGE) ? INT64_MIN : -n; + case UC_INTEGER: + rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, nv); + break; - rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, ucv_int64_new(n)); - //OP(rv)->is_overflow = (errno == ERANGE); - } - else { + default: rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal")); } |