summaryrefslogtreecommitdiffhomepage
path: root/lexer.c
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2021-12-13 12:58:18 +0100
committerJo-Philipp Wich <jo@mein.io>2022-01-04 15:53:36 +0100
commitb605dbfcf04f310e08634b52507da7a4155bfce1 (patch)
tree04397dab9be96a5978e08366299671a8aa507267 /lexer.c
parent8907ce41a36f8d42097d884550fb3cfbba62e6c5 (diff)
treewide: rework numeric value handling
- Parse integer literals as unsigned numeric values in order to be able to represent the entire unsigned 64bit value range - Stop parsing minus-prefixed integer literals as negative numbers but treat them as separate minus operator followed by a positive integer instead - Only store unsigned numeric constants in bytecode - Rework numeric comparison logic to be able to handle full 64bit unsigned integers - If possible, yield unsigned 64 bit results for additions - Simplify numeric value conversion API - Compile code with -fwrapv for defined signed overflow semantics Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c40
1 files changed, 13 insertions, 27 deletions
diff --git a/lexer.c b/lexer.c
index c5314ae..7637306 100644
--- a/lexer.c
+++ b/lexer.c
@@ -121,7 +121,7 @@ static const struct token tokens[] = {
{ TK_RPAREN, { .pat = ")" }, 1, NULL },
{ TK_QMARK, { .pat = "?" }, 1, NULL },
{ TK_SCOL, { .pat = ";" }, 1, NULL },
- //{ TK_SUB, { .pat = "-" }, 1, NULL },
+ { TK_SUB, { .pat = "-" }, 1, NULL },
{ TK_DOT, { .pat = "." }, 1, NULL },
{ TK_STRING, { .pat = "'" }, 1, parse_string },
{ TK_STRING, { .pat = "\"" }, 1, parse_string },
@@ -129,7 +129,6 @@ static const struct token tokens[] = {
{ TK_LABEL, { .pat = "_" }, 1, parse_label },
{ TK_LABEL, { .pat = "az" }, 0, parse_label },
{ TK_LABEL, { .pat = "AZ" }, 0, parse_label },
- { TK_NUMBER, { .pat = "-" }, 1, parse_number },
{ TK_NUMBER, { .pat = "09" }, 0, parse_number },
};
@@ -779,39 +778,26 @@ is_numeric_char(uc_lexer_t *lex, char c)
static uc_token_t *
parse_number(uc_lexer_t *lex)
{
- const struct token *tok = lex->tok;
uc_token_t *rv = NULL;
- long long int n;
- char *ptr, *e;
- double d;
+ uc_value_t *nv = NULL;
+ const char *ptr;
+ char *e;
if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) {
- if (lex->lookbehindlen == 0 && !is_numeric_char(lex, lex->bufstart[0]))
- return emit_op(lex, lex->source->off, TK_SUB, NULL);
-
lookbehind_append(lex, "\0", 1);
- n = strtoll(lex->lookbehind, &e, 0);
-
- if (*e == '.' || *e == 'e' || *e == 'E') {
- d = strtod(lex->lookbehind, &e);
+ nv = uc_number_parse(lex->lookbehind, &e);
- if (tok->u.pat[0] == '-')
- d = -d;
+ switch (ucv_type(nv)) {
+ case UC_DOUBLE:
+ rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, nv);
+ break;
- if (e > lex->lookbehind && *e == 0)
- rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, ucv_double_new(d));
- else
- rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal"));
- }
- else if (*e == 0) {
- if (tok->u.pat[0] == '-')
- n = (errno == ERANGE) ? INT64_MIN : -n;
+ case UC_INTEGER:
+ rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, nv);
+ break;
- rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, ucv_int64_new(n));
- //OP(rv)->is_overflow = (errno == ERANGE);
- }
- else {
+ default:
rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal"));
}