treewide: rework numeric value handling

- Parse integer literals as unsigned numeric values in order to be able to represent the entire unsigned 64bit value range - Stop parsing minus-prefixed integer literals as negative numbers but treat them as separate minus operator followed by a positive integer instead - Only store unsigned numeric constants in bytecode - Rework numeric comparison logic to be able to handle full 64bit unsigned integers - If possible, yield unsigned 64 bit results for additions - Simplify numeric value conversion API - Compile code with -fwrapv for defined signed overflow semantics Signed-off-by: Jo-Philipp Wich <jo@mein.io>
author: Jo-Philipp Wich <jo@mein.io> 2021-12-13 12:58:18 +0100
committer: Jo-Philipp Wich <jo@mein.io> 2022-01-04 15:53:36 +0100
commit: b605dbfcf04f310e08634b52507da7a4155bfce1 (patch)
tree: 04397dab9be96a5978e08366299671a8aa507267 /lexer.c
parent: 8907ce41a36f8d42097d884550fb3cfbba62e6c5 (diff)
1 files changed, 13 insertions, 27 deletions
diff --git a/lexer.c b/lexer.c
index c5314ae..7637306 100644
--- a/lexer.c
+++ b/lexer.c
@@ -121,7 +121,7 @@ static const struct token tokens[] = {
 	{ TK_RPAREN,	{ .pat = ")" },     1, NULL },
 	{ TK_QMARK,		{ .pat = "?" },     1, NULL },
 	{ TK_SCOL,		{ .pat = ";" },     1, NULL },
-	//{ TK_SUB,		{ .pat = "-" },     1, NULL },
+	{ TK_SUB,		{ .pat = "-" },     1, NULL },
 	{ TK_DOT,		{ .pat = "." },     1, NULL },
 	{ TK_STRING,	{ .pat = "'" },     1, parse_string },
 	{ TK_STRING,	{ .pat = "\"" },    1, parse_string },
@@ -129,7 +129,6 @@ static const struct token tokens[] = {
 	{ TK_LABEL,		{ .pat = "_" },     1, parse_label },
 	{ TK_LABEL,		{ .pat = "az" },    0, parse_label },
 	{ TK_LABEL,		{ .pat = "AZ" },    0, parse_label },
-	{ TK_NUMBER,	{ .pat = "-" },     1, parse_number },
 	{ TK_NUMBER,	{ .pat = "09" },    0, parse_number },
 };
 
@@ -779,39 +778,26 @@ is_numeric_char(uc_lexer_t *lex, char c)
 static uc_token_t *
 parse_number(uc_lexer_t *lex)
 {
-	const struct token *tok = lex->tok;
 	uc_token_t *rv = NULL;
-	long long int n;
-	char *ptr, *e;
-	double d;
+	uc_value_t *nv = NULL;
+	const char *ptr;
+	char *e;
 
 	if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) {
-		if (lex->lookbehindlen == 0 && !is_numeric_char(lex, lex->bufstart[0]))
-			return emit_op(lex, lex->source->off, TK_SUB, NULL);
-
 		lookbehind_append(lex, "\0", 1);
 
-		n = strtoll(lex->lookbehind, &e, 0);
-
-		if (*e == '.' || *e == 'e' || *e == 'E') {
-			d = strtod(lex->lookbehind, &e);
+		nv = uc_number_parse(lex->lookbehind, &e);
 
-			if (tok->u.pat[0] == '-')
-				d = -d;
+		switch (ucv_type(nv)) {
+		case UC_DOUBLE:
+			rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, nv);
+			break;
 
-			if (e > lex->lookbehind && *e == 0)
-				rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, ucv_double_new(d));
-			else
-				rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal"));
-		}
-		else if (*e == 0) {
-			if (tok->u.pat[0] == '-')
-				n = (errno == ERANGE) ? INT64_MIN : -n;
+		case UC_INTEGER:
+			rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, nv);
+			break;
 
-			rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, ucv_int64_new(n));
-			//OP(rv)->is_overflow = (errno == ERANGE);
-		}
-		else {
+		default:
 			rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal"));
 		}
author	Jo-Philipp Wich <jo@mein.io>	2021-12-13 12:58:18 +0100
committer	Jo-Philipp Wich <jo@mein.io>	2022-01-04 15:53:36 +0100
commit	b605dbfcf04f310e08634b52507da7a4155bfce1 (patch)
tree	04397dab9be96a5978e08366299671a8aa507267 /lexer.c
parent	8907ce41a36f8d42097d884550fb3cfbba62e6c5 (diff)