diff options
author | Jo-Philipp Wich <jo@mein.io> | 2021-12-13 12:58:18 +0100 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2022-01-04 15:53:36 +0100 |
commit | b605dbfcf04f310e08634b52507da7a4155bfce1 (patch) | |
tree | 04397dab9be96a5978e08366299671a8aa507267 /vm.c | |
parent | 8907ce41a36f8d42097d884550fb3cfbba62e6c5 (diff) |
treewide: rework numeric value handling
- Parse integer literals as unsigned numeric values in order to be able
to represent the entire unsigned 64bit value range
- Stop parsing minus-prefixed integer literals as negative numbers but
treat them as separate minus operator followed by a positive integer
instead
- Only store unsigned numeric constants in bytecode
- Rework numeric comparison logic to be able to handle full 64bit
unsigned integers
- If possible, yield unsigned 64 bit results for additions
- Simplify numeric value conversion API
- Compile code with -fwrapv for defined signed overflow semantics
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'vm.c')
-rw-r--r-- | vm.c | 338 |
1 files changed, 267 insertions, 71 deletions
@@ -19,6 +19,8 @@ #include <assert.h> #include <ctype.h> #include <math.h> +#include <errno.h> +#include <limits.h> #include "ucode/vm.h" #include "ucode/compiler.h" @@ -33,9 +35,9 @@ static const char *insn_names[__I_MAX] = { static const int8_t insn_operand_bytes[__I_MAX] = { [I_LOAD] = 4, - [I_LOAD8] = -1, - [I_LOAD16] = -2, - [I_LOAD32] = -4, + [I_LOAD8] = 1, + [I_LOAD16] = 2, + [I_LOAD32] = 4, [I_LREXP] = 4, @@ -237,19 +239,6 @@ uc_vm_decode_insn(uc_vm_t *vm, uc_callframe_t *frame, uc_chunk_t *chunk) case 0: break; - case -1: - vm->arg.s8 = frame->ip[0] - 0x7f; - frame->ip++; - break; - - case -2: - vm->arg.s16 = ( - frame->ip[0] * 0x100 + - frame->ip[1] - ) - 0x7fff; - frame->ip += 2; - break; - case -4: vm->arg.s32 = ( frame->ip[0] * 0x1000000UL + @@ -265,6 +254,14 @@ uc_vm_decode_insn(uc_vm_t *vm, uc_callframe_t *frame, uc_chunk_t *chunk) frame->ip++; break; + case 2: + vm->arg.u16 = ( + frame->ip[0] * 0x100 + + frame->ip[1] + ); + frame->ip += 2; + break; + case 4: vm->arg.u32 = ( frame->ip[0] * 0x1000000UL + @@ -922,15 +919,15 @@ uc_vm_insn_load(uc_vm_t *vm, uc_vm_insn_t insn) break; case I_LOAD8: - uc_vm_stack_push(vm, ucv_int64_new(vm->arg.s8)); + uc_vm_stack_push(vm, ucv_uint64_new(vm->arg.u8)); break; case I_LOAD16: - uc_vm_stack_push(vm, ucv_int64_new(vm->arg.s16)); + uc_vm_stack_push(vm, ucv_uint64_new(vm->arg.u16)); break; case I_LOAD32: - uc_vm_stack_push(vm, ucv_int64_new(vm->arg.s32)); + uc_vm_stack_push(vm, ucv_uint64_new(vm->arg.u32)); break; default: @@ -1244,54 +1241,117 @@ uc_vm_insn_store_local(uc_vm_t *vm, uc_vm_insn_t insn) uc_vm_stack_set(vm, frame->stackframe + vm->arg.u32, val); } +static int64_t +int64(uc_value_t *nv, uint64_t *u64) +{ + int64_t n; + + n = ucv_int64_get(nv); + *u64 = 0; + + if (errno == ERANGE) { + n = INT64_MAX; + *u64 = ucv_uint64_get(nv); + } + + return n; +} + +static uint64_t +abs64(int64_t n) +{ + if (n == INT64_MIN) + return 0x8000000000000000ULL; + + if (n < 0) + return -n; + + return n; +} + + static uc_value_t * uc_vm_value_bitop(uc_vm_t *vm, uc_vm_insn_t operation, uc_value_t *value, uc_value_t *operand) { - uc_value_t *rv = NULL; + uc_value_t *nv1, *nv2, *rv = NULL; + uint64_t u1, u2; int64_t n1, n2; - double d; - if (ucv_cast_number(value, &n1, &d) == UC_DOUBLE) - n1 = isnan(d) ? 0 : (int64_t)d; + nv1 = ucv_to_number(value); + nv2 = ucv_to_number(operand); - if (ucv_cast_number(operand, &n2, &d) == UC_DOUBLE) - n2 = isnan(d) ? 0 : (int64_t)d; + n1 = int64(nv1, &u1); + n2 = int64(nv2, &u2); - switch (operation) { - case I_LSHIFT: - rv = ucv_int64_new(n1 << n2); - break; + if (n1 < 0 || n2 < 0) { + switch (operation) { + case I_LSHIFT: + rv = ucv_int64_new(n1 << n2); + break; - case I_RSHIFT: - rv = ucv_int64_new(n1 >> n2); - break; + case I_RSHIFT: + rv = ucv_int64_new(n1 >> n2); + break; - case I_BAND: - rv = ucv_int64_new(n1 & n2); - break; + case I_BAND: + rv = ucv_int64_new(n1 & n2); + break; - case I_BXOR: - rv = ucv_int64_new(n1 ^ n2); - break; + case I_BXOR: + rv = ucv_int64_new(n1 ^ n2); + break; - case I_BOR: - rv = ucv_int64_new(n1 | n2); - break; + case I_BOR: + rv = ucv_int64_new(n1 | n2); + break; - default: - break; + default: + break; + } + } + else { + if (!u1) u1 = (uint64_t)n1; + if (!u2) u2 = (uint64_t)n2; + + switch (operation) { + case I_LSHIFT: + rv = ucv_uint64_new(u1 << (u2 % (sizeof(uint64_t) * CHAR_BIT))); + break; + + case I_RSHIFT: + rv = ucv_uint64_new(u1 >> (u2 % (sizeof(uint64_t) * CHAR_BIT))); + break; + + case I_BAND: + rv = ucv_uint64_new(u1 & u2); + break; + + case I_BXOR: + rv = ucv_uint64_new(u1 ^ u2); + break; + + case I_BOR: + rv = ucv_uint64_new(u1 | u2); + break; + + default: + break; + } } + ucv_put(nv1); + ucv_put(nv2); + return rv; } static uc_value_t * uc_vm_value_arith(uc_vm_t *vm, uc_vm_insn_t operation, uc_value_t *value, uc_value_t *operand) { - uc_value_t *rv = NULL; - uc_type_t t1, t2; + uc_value_t *nv1, *nv2, *rv = NULL; char *s, *s1, *s2; size_t len1, len2; + uint64_t u1, u2; int64_t n1, n2; double d1, d2; @@ -1318,12 +1378,19 @@ uc_vm_value_arith(uc_vm_t *vm, uc_vm_insn_t operation, uc_value_t *value, uc_val return rv; } - t1 = ucv_cast_number(value, &n1, &d1); - t2 = ucv_cast_number(operand, &n2, &d2); + nv1 = ucv_to_number(value); + nv2 = ucv_to_number(operand); - if (t1 == UC_DOUBLE || t2 == UC_DOUBLE) { - d1 = (t1 == UC_DOUBLE) ? d1 : (double)n1; - d2 = (t2 == UC_DOUBLE) ? d2 : (double)n2; + /* any operation involving NaN results in NaN */ + if (!nv1 || !nv2) { + ucv_put(nv1); + ucv_put(nv2); + + return ucv_double_new(NAN); + } + if (ucv_type(nv1) == UC_DOUBLE || ucv_type(nv2) == UC_DOUBLE) { + d1 = ucv_double_get(nv1); + d2 = ucv_double_get(nv2); switch (operation) { case I_ADD: @@ -1332,6 +1399,7 @@ uc_vm_value_arith(uc_vm_t *vm, uc_vm_insn_t operation, uc_value_t *value, uc_val break; case I_SUB: + case I_MINUS: rv = ucv_double_new(d1 - d2); break; @@ -1352,7 +1420,7 @@ uc_vm_value_arith(uc_vm_t *vm, uc_vm_insn_t operation, uc_value_t *value, uc_val break; case I_MOD: - rv = ucv_double_new(NAN); + rv = ucv_double_new(fmod(d1, d2)); break; default: @@ -1363,30 +1431,100 @@ uc_vm_value_arith(uc_vm_t *vm, uc_vm_insn_t operation, uc_value_t *value, uc_val } } else { + n1 = int64(nv1, &u1); + n2 = int64(nv2, &u2); + switch (operation) { case I_ADD: case I_PLUS: - rv = ucv_int64_new(n1 + n2); + if (n1 < 0 || n2 < 0) { + if (u1) + rv = ucv_uint64_new(u1 - abs64(n2)); + else if (u2) + rv = ucv_uint64_new(u2 - abs64(n1)); + else + rv = ucv_int64_new(n1 + n2); + } + else { + if (!u1) u1 = (uint64_t)n1; + if (!u2) u2 = (uint64_t)n2; + + rv = ucv_uint64_new(u1 + u2); + } + break; case I_SUB: - rv = ucv_int64_new(n1 - n2); + case I_MINUS: + if (n1 < 0 && n2 < 0) { + if (n1 > n2) + rv = ucv_uint64_new(abs64(n2) - abs64(n1)); + else + rv = ucv_int64_new(n1 - n2); + } + else if (n1 >= 0 && n2 >= 0) { + if (!u1) u1 = (uint64_t)n1; + if (!u2) u2 = (uint64_t)n2; + + if (u2 > u1) + rv = ucv_int64_new(-(u2 - u1)); + else + rv = ucv_uint64_new(u1 - u2); + } + else if (n1 >= 0) { + if (!u1) u1 = (uint64_t)n1; + + rv = ucv_uint64_new(u1 + abs64(n2)); + } + else { + rv = ucv_int64_new(n1 - n2); + } + break; case I_MUL: - rv = ucv_int64_new(n1 * n2); + if (n1 < 0 && n2 < 0) { + rv = ucv_uint64_new(abs64(n1) * abs64(n2)); + } + else if (n1 >= 0 && n2 >= 0) { + if (!u1) u1 = (uint64_t)n1; + if (!u2) u2 = (uint64_t)n2; + + rv = ucv_uint64_new(u1 * u2); + } + else { + rv = ucv_int64_new(n1 * n2); + } + break; case I_DIV: - if (n2 == 0) + if (n2 == 0) { rv = ucv_double_new(INFINITY); - else + } + else if (n1 < 0 || n2 < 0) { rv = ucv_int64_new(n1 / n2); + } + else { + if (!u1) u1 = (uint64_t)n1; + if (!u2) u2 = (uint64_t)n2; + + rv = ucv_uint64_new(u1 / u2); + } break; case I_MOD: - rv = ucv_int64_new(n1 % n2); + if (n1 < 0 || n2 < 0) { + rv = ucv_int64_new(n1 % n2); + } + else { + if (!u1) u1 = (uint64_t)n1; + if (!u2) u2 = (uint64_t)n2; + + rv = ucv_uint64_new(u1 % u2); + } + break; default: @@ -1397,6 +1535,9 @@ uc_vm_value_arith(uc_vm_t *vm, uc_vm_insn_t operation, uc_value_t *value, uc_val } } + ucv_put(nv1); + ucv_put(nv2); + return rv; } @@ -1643,25 +1784,71 @@ uc_vm_insn_arith(uc_vm_t *vm, uc_vm_insn_t insn) static void uc_vm_insn_plus_minus(uc_vm_t *vm, uc_vm_insn_t insn) { - uc_value_t *v = uc_vm_stack_pop(vm); + uc_value_t *v = uc_vm_stack_pop(vm), *nv; bool is_sub = (insn == I_MINUS); - uc_type_t t; int64_t n; double d; - t = ucv_cast_number(v, &n, &d); + if (ucv_type(v) == UC_STRING) { + nv = uc_number_parse(ucv_string_get(v), NULL); - ucv_put(v); + if (nv) { + ucv_put(v); + v = nv; + } + } - switch (t) { + switch (ucv_type(v)) { case UC_INTEGER: - uc_vm_stack_push(vm, ucv_int64_new(is_sub ? -n : n)); + n = ucv_int64_get(v); + + /* numeric value is in range 9223372036854775808..18446744073709551615 */ + if (errno == ERANGE) { + if (is_sub) + /* make negation of large numeric value result in smallest negative value */ + uc_vm_stack_push(vm, ucv_int64_new(INT64_MIN)); + else + /* for positive number coercion return value as-is */ + uc_vm_stack_push(vm, ucv_get(v)); + } + + /* numeric value is in range -9223372036854775808..9223372036854775807 */ + else { + if (is_sub) { + if (n == INT64_MIN) + /* make negation of minimum value result in maximum signed positive value */ + uc_vm_stack_push(vm, ucv_int64_new(INT64_MAX)); + else + /* for all other values flip the sign */ + uc_vm_stack_push(vm, ucv_int64_new(-n)); + } + else { + /* for positive number coercion return value as-is */ + uc_vm_stack_push(vm, ucv_get(v)); + } + } + break; - default: + case UC_DOUBLE: + d = ucv_double_get(v); uc_vm_stack_push(vm, ucv_double_new(is_sub ? -d : d)); break; + + case UC_BOOLEAN: + n = (int64_t)ucv_boolean_get(v); + uc_vm_stack_push(vm, ucv_int64_new(is_sub ? -n : n)); + break; + + case UC_NULL: + uc_vm_stack_push(vm, ucv_int64_new(0)); + break; + + default: + uc_vm_stack_push(vm, ucv_double_new(NAN)); } + + ucv_put(v); } static void @@ -1683,15 +1870,24 @@ static void uc_vm_insn_complement(uc_vm_t *vm, uc_vm_insn_t insn) { uc_value_t *v = uc_vm_stack_pop(vm); + uc_value_t *nv; + uint64_t u; int64_t n; - double d; - if (ucv_cast_number(v, &n, &d) == UC_DOUBLE) - n = isnan(d) ? 0 : (int64_t)d; + nv = ucv_to_number(v); + n = int64(nv, &u); - ucv_put(v); + if (n < 0) { + uc_vm_stack_push(vm, ucv_int64_new(~n)); + } + else { + if (!u) u = (uint64_t)n; + + uc_vm_stack_push(vm, ucv_uint64_new(~u)); + } - uc_vm_stack_push(vm, ucv_int64_new(~n)); + ucv_put(nv); + ucv_put(v); } static void @@ -1700,7 +1896,7 @@ uc_vm_insn_rel(uc_vm_t *vm, uc_vm_insn_t insn) uc_value_t *r2 = uc_vm_stack_pop(vm); uc_value_t *r1 = uc_vm_stack_pop(vm); - bool res = ucv_compare(insn, r1, r2); + bool res = ucv_compare(insn, r1, r2, NULL); ucv_put(r1); ucv_put(r2); @@ -1724,7 +1920,7 @@ uc_vm_insn_in(uc_vm_t *vm, uc_vm_insn_t insn) arridx < arrlen; arridx++) { item = ucv_array_get(r2, arridx); - if (ucv_compare(I_EQ, r1, item)) { + if (ucv_compare(I_EQ, r1, item, NULL)) { found = true; break; } |