diff options
author | Jo-Philipp Wich <jo@mein.io> | 2022-06-01 12:36:44 +0200 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2022-06-01 12:53:26 +0200 |
commit | d99604749d658f5f344d53e77dd52fbb0f6d176c (patch) | |
tree | 2e60555dc4ce9e81b42b7556dd2e2491473f776c | |
parent | 9efbe183d7805eb60652a3745ec48cd32682ef8d (diff) |
syntax: adjust number literal parsing and string to number conversion
- Recognize new number literal prefixes `0o` and `0O` for octal as well
as `0b` and `0B` for binary number literals
- Treat number literals with leading zeros as octal while parsing but
as decimal ones on implicit number conversions, means `012` will yield
`10` while `+"012"` or `"012" + 0` will yield `12`
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r-- | include/ucode/vallist.h | 1 | ||||
-rw-r--r-- | lexer.c | 34 | ||||
-rw-r--r-- | tests/custom/00_syntax/10_numeric_literals | 4 | ||||
-rw-r--r-- | tests/custom/03_stdlib/08_int | 17 | ||||
-rw-r--r-- | vallist.c | 53 |
5 files changed, 98 insertions, 11 deletions
diff --git a/include/ucode/vallist.h b/include/ucode/vallist.h index fb46677..3dc5720 100644 --- a/include/ucode/vallist.h +++ b/include/ucode/vallist.h @@ -34,6 +34,7 @@ typedef enum { } uc_value_type_t; uc_value_t *uc_number_parse(const char *buf, char **end); +uc_value_t *uc_number_parse_octal(const char *buf, char **end); bool uc_double_pack(double d, char *buf, bool little_endian); double uc_double_unpack(const char *buf, bool little_endian); @@ -696,10 +696,38 @@ is_numeric_char(uc_lexer_t *lex, char c) { char prev = lex->lookbehindlen ? lex->lookbehind[lex->lookbehindlen-1] : 0; - if ((prev == 'e' || prev == 'E') && (c == '-' || c == '+')) + switch (c|32) { + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': return true; - return prev ? (isxdigit(c) || c == 'x' || c == 'X' || c == '.') : (isdigit(c) || c == '.'); + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'o': + case 'x': + /* require previous char, a number literal cannot start with these */ + return prev != 0; + + case '+': + case '-': + /* sign is only allowed after an exponent char */ + return (prev|32) == 'e'; + } + + return false; } static uc_token_t * @@ -713,7 +741,7 @@ parse_number(uc_lexer_t *lex) if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) { lookbehind_append(lex, "\0", 1); - nv = uc_number_parse(lex->lookbehind, &e); + nv = uc_number_parse_octal(lex->lookbehind, &e); switch (ucv_type(nv)) { case UC_DOUBLE: diff --git a/tests/custom/00_syntax/10_numeric_literals b/tests/custom/00_syntax/10_numeric_literals index 7aa49b6..a19bbd5 100644 --- a/tests/custom/00_syntax/10_numeric_literals +++ b/tests/custom/00_syntax/10_numeric_literals @@ -8,6 +8,8 @@ doubles internally. -- Expect stdout -- Integers literals: 123, 127, 2748, 57082 Float literals: 10, 10.3, 1.23456e-65, 16.0625 +Octal literals: 63, 118 +Binary literals: 7, 11 Special values: Infinity, Infinity, NaN, NaN Minimum values: -9223372036854775808, -1.79769e+308 Maximum values: 9223372036854775807, 1.79769e+308 @@ -18,6 +20,8 @@ Maximum truncation: 18446744073709551615, Infinity -- Testcase -- Integers literals: {{ 123 }}, {{ 0177 }}, {{ 0xabc }}, {{ 0xDEFA }} Float literals: {{ 10. }}, {{ 10.3 }}, {{ 123.456e-67 }}, {{ 0x10.1 }} +Octal literals: {{ 0o77 }}, {{ 0O166 }} +Binary literals: {{ 0b111 }}, {{ 0B1011 }} Special values: {{ Infinity }}, {{ 1 / 0 }}, {{ NaN }}, {{ "x" / 1 }} Minimum values: {{ -9223372036854775808 }}, {{ -1.7976931348623158e+308 }} Maximum values: {{ 9223372036854775807 }}, {{ 1.7976931348623158e+308 }} diff --git a/tests/custom/03_stdlib/08_int b/tests/custom/03_stdlib/08_int index 12db299..eae4904 100644 --- a/tests/custom/03_stdlib/08_int +++ b/tests/custom/03_stdlib/08_int @@ -1,8 +1,9 @@ The `int()` function converts the given value into a signed integer -value and returns the resulting number. +value and returns the resulting number. In case the value is of type +string, a second optional base argument may be specified which is +passed to the underlying strtoll(3) implementation. Returns `NaN` if the given argument is not convertible into a number. - Returns `NaN` if the conversion result is out of range. -- Testcase -- @@ -19,7 +20,11 @@ Returns `NaN` if the conversion result is out of range. int("0xffffffffffffffff"), int("0177"), int("+145"), - int("-96") + int("-96"), + int("0177", 8), + int("0x1000", 16), + int("1111", 2), + int("0xffffffffffffffff", 16) ]); %} -- End -- @@ -37,6 +42,10 @@ Returns `NaN` if the conversion result is out of range. 0, 177, 145, - -96 + -96, + 127, + 4096, + 15, + "NaN" ] -- End -- @@ -45,12 +45,13 @@ #define UC_VALLIST_CHUNK_SIZE 8 -uc_value_t * -uc_number_parse(const char *buf, char **end) +static uc_value_t * +uc_number_parse_common(const char *buf, bool octal, char **end) { unsigned long long u; const char *p = buf; bool neg = false; + int base = 10; double d; char *e; @@ -61,6 +62,9 @@ uc_number_parse(const char *buf, char **end) neg = true; p++; } + else if (*p == '+') { + p++; + } if (*p != 0 && !isxdigit(*p)) return NULL; @@ -68,9 +72,38 @@ uc_number_parse(const char *buf, char **end) if (!end) end = &e; - u = strtoull(p, end, 0); + if (p[0] == '0') { + switch (p[1]|32) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + base = octal ? 8 : 10; + break; + + case 'x': + base = 16; + break; + + case 'b': + base = 2; + p += 2; + break; + + case 'o': + base = 8; + p += 2; + break; + } + } - if (**end == '.' || **end == 'e' || **end == 'E') { + u = strtoull(p, end, base); + + if (base >= 10 && (**end == '.' || (**end|32) == 'e')) { d = strtod(p, end); if (!isspace(**end) && **end != 0) @@ -95,6 +128,18 @@ uc_number_parse(const char *buf, char **end) return ucv_uint64_new(u); } +uc_value_t * +uc_number_parse(const char *buf, char **end) +{ + return uc_number_parse_common(buf, false, end); +} + +uc_value_t * +uc_number_parse_octal(const char *buf, char **end) +{ + return uc_number_parse_common(buf, true, end); +} + bool uc_double_pack(double d, char *buf, bool little_endian) { |