summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2022-06-01 12:36:44 +0200
committerJo-Philipp Wich <jo@mein.io>2022-06-01 12:53:26 +0200
commitd99604749d658f5f344d53e77dd52fbb0f6d176c (patch)
tree2e60555dc4ce9e81b42b7556dd2e2491473f776c
parent9efbe183d7805eb60652a3745ec48cd32682ef8d (diff)
syntax: adjust number literal parsing and string to number conversion
- Recognize new number literal prefixes `0o` and `0O` for octal as well as `0b` and `0B` for binary number literals - Treat number literals with leading zeros as octal while parsing but as decimal ones on implicit number conversions, means `012` will yield `10` while `+"012"` or `"012" + 0` will yield `12` Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--include/ucode/vallist.h1
-rw-r--r--lexer.c34
-rw-r--r--tests/custom/00_syntax/10_numeric_literals4
-rw-r--r--tests/custom/03_stdlib/08_int17
-rw-r--r--vallist.c53
5 files changed, 98 insertions, 11 deletions
diff --git a/include/ucode/vallist.h b/include/ucode/vallist.h
index fb46677..3dc5720 100644
--- a/include/ucode/vallist.h
+++ b/include/ucode/vallist.h
@@ -34,6 +34,7 @@ typedef enum {
} uc_value_type_t;
uc_value_t *uc_number_parse(const char *buf, char **end);
+uc_value_t *uc_number_parse_octal(const char *buf, char **end);
bool uc_double_pack(double d, char *buf, bool little_endian);
double uc_double_unpack(const char *buf, bool little_endian);
diff --git a/lexer.c b/lexer.c
index 9ccc3ae..aae3e2f 100644
--- a/lexer.c
+++ b/lexer.c
@@ -696,10 +696,38 @@ is_numeric_char(uc_lexer_t *lex, char c)
{
char prev = lex->lookbehindlen ? lex->lookbehind[lex->lookbehindlen-1] : 0;
- if ((prev == 'e' || prev == 'E') && (c == '-' || c == '+'))
+ switch (c|32) {
+ case '.':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
return true;
- return prev ? (isxdigit(c) || c == 'x' || c == 'X' || c == '.') : (isdigit(c) || c == '.');
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'o':
+ case 'x':
+ /* require previous char, a number literal cannot start with these */
+ return prev != 0;
+
+ case '+':
+ case '-':
+ /* sign is only allowed after an exponent char */
+ return (prev|32) == 'e';
+ }
+
+ return false;
}
static uc_token_t *
@@ -713,7 +741,7 @@ parse_number(uc_lexer_t *lex)
if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) {
lookbehind_append(lex, "\0", 1);
- nv = uc_number_parse(lex->lookbehind, &e);
+ nv = uc_number_parse_octal(lex->lookbehind, &e);
switch (ucv_type(nv)) {
case UC_DOUBLE:
diff --git a/tests/custom/00_syntax/10_numeric_literals b/tests/custom/00_syntax/10_numeric_literals
index 7aa49b6..a19bbd5 100644
--- a/tests/custom/00_syntax/10_numeric_literals
+++ b/tests/custom/00_syntax/10_numeric_literals
@@ -8,6 +8,8 @@ doubles internally.
-- Expect stdout --
Integers literals: 123, 127, 2748, 57082
Float literals: 10, 10.3, 1.23456e-65, 16.0625
+Octal literals: 63, 118
+Binary literals: 7, 11
Special values: Infinity, Infinity, NaN, NaN
Minimum values: -9223372036854775808, -1.79769e+308
Maximum values: 9223372036854775807, 1.79769e+308
@@ -18,6 +20,8 @@ Maximum truncation: 18446744073709551615, Infinity
-- Testcase --
Integers literals: {{ 123 }}, {{ 0177 }}, {{ 0xabc }}, {{ 0xDEFA }}
Float literals: {{ 10. }}, {{ 10.3 }}, {{ 123.456e-67 }}, {{ 0x10.1 }}
+Octal literals: {{ 0o77 }}, {{ 0O166 }}
+Binary literals: {{ 0b111 }}, {{ 0B1011 }}
Special values: {{ Infinity }}, {{ 1 / 0 }}, {{ NaN }}, {{ "x" / 1 }}
Minimum values: {{ -9223372036854775808 }}, {{ -1.7976931348623158e+308 }}
Maximum values: {{ 9223372036854775807 }}, {{ 1.7976931348623158e+308 }}
diff --git a/tests/custom/03_stdlib/08_int b/tests/custom/03_stdlib/08_int
index 12db299..eae4904 100644
--- a/tests/custom/03_stdlib/08_int
+++ b/tests/custom/03_stdlib/08_int
@@ -1,8 +1,9 @@
The `int()` function converts the given value into a signed integer
-value and returns the resulting number.
+value and returns the resulting number. In case the value is of type
+string, a second optional base argument may be specified which is
+passed to the underlying strtoll(3) implementation.
Returns `NaN` if the given argument is not convertible into a number.
-
Returns `NaN` if the conversion result is out of range.
-- Testcase --
@@ -19,7 +20,11 @@ Returns `NaN` if the conversion result is out of range.
int("0xffffffffffffffff"),
int("0177"),
int("+145"),
- int("-96")
+ int("-96"),
+ int("0177", 8),
+ int("0x1000", 16),
+ int("1111", 2),
+ int("0xffffffffffffffff", 16)
]);
%}
-- End --
@@ -37,6 +42,10 @@ Returns `NaN` if the conversion result is out of range.
0,
177,
145,
- -96
+ -96,
+ 127,
+ 4096,
+ 15,
+ "NaN"
]
-- End --
diff --git a/vallist.c b/vallist.c
index b42f564..44709bf 100644
--- a/vallist.c
+++ b/vallist.c
@@ -45,12 +45,13 @@
#define UC_VALLIST_CHUNK_SIZE 8
-uc_value_t *
-uc_number_parse(const char *buf, char **end)
+static uc_value_t *
+uc_number_parse_common(const char *buf, bool octal, char **end)
{
unsigned long long u;
const char *p = buf;
bool neg = false;
+ int base = 10;
double d;
char *e;
@@ -61,6 +62,9 @@ uc_number_parse(const char *buf, char **end)
neg = true;
p++;
}
+ else if (*p == '+') {
+ p++;
+ }
if (*p != 0 && !isxdigit(*p))
return NULL;
@@ -68,9 +72,38 @@ uc_number_parse(const char *buf, char **end)
if (!end)
end = &e;
- u = strtoull(p, end, 0);
+ if (p[0] == '0') {
+ switch (p[1]|32) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ base = octal ? 8 : 10;
+ break;
+
+ case 'x':
+ base = 16;
+ break;
+
+ case 'b':
+ base = 2;
+ p += 2;
+ break;
+
+ case 'o':
+ base = 8;
+ p += 2;
+ break;
+ }
+ }
- if (**end == '.' || **end == 'e' || **end == 'E') {
+ u = strtoull(p, end, base);
+
+ if (base >= 10 && (**end == '.' || (**end|32) == 'e')) {
d = strtod(p, end);
if (!isspace(**end) && **end != 0)
@@ -95,6 +128,18 @@ uc_number_parse(const char *buf, char **end)
return ucv_uint64_new(u);
}
+uc_value_t *
+uc_number_parse(const char *buf, char **end)
+{
+ return uc_number_parse_common(buf, false, end);
+}
+
+uc_value_t *
+uc_number_parse_octal(const char *buf, char **end)
+{
+ return uc_number_parse_common(buf, true, end);
+}
+
bool
uc_double_pack(double d, char *buf, bool little_endian)
{