/* * Binary data packing/unpacking module for ucode. * Copyright (C) 2021 Jo-Philipp Wich * * This module is heavily based on the Python 3.10 "_struct.c" module source * published under the following license: * * ----------------------------------------------------------------------------------- * * 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and * the Individual or Organization ("Licensee") accessing and otherwise using Python * 3.10.0 software in source or binary form and its associated documentation. * * 2. Subject to the terms and conditions of this License Agreement, PSF hereby * grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, * analyze, test, perform and/or display publicly, prepare derivative works, * distribute, and otherwise use Python 3.10.0 alone or in any derivative * version, provided, however, that PSF's License Agreement and PSF's notice of * copyright, i.e., "Copyright © 2001-2021 Python Software Foundation; All Rights * Reserved" are retained in Python 3.10.0 alone or in any derivative version * prepared by Licensee. * * 3. In the event Licensee prepares a derivative work that is based on or * incorporates Python 3.10.0 or any part thereof, and wants to make the * derivative work available to others as provided herein, then Licensee hereby * agrees to include in any such work a brief summary of the changes made to Python * 3.10.0. * * 4. PSF is making Python 3.10.0 available to Licensee on an "AS IS" basis. * PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF * EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR * WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE * USE OF PYTHON 3.10.0 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. * * 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.10.0 * FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF * MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 3.10.0, OR ANY DERIVATIVE * THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. * * 6. This License Agreement will automatically terminate upon a material breach of * its terms and conditions. * * 7. Nothing in this License Agreement shall be deemed to create any relationship * of agency, partnership, or joint venture between PSF and Licensee. This License * Agreement does not grant permission to use PSF trademarks or trade name in a * trademark sense to endorse or promote products or services of Licensee, or any * third party. * * 8. By copying, installing or otherwise using Python 3.10.0, Licensee agrees * to be bound by the terms and conditions of this License Agreement. * * ----------------------------------------------------------------------------------- * * Brief summary of changes compared to the original Python 3.10 source: * * - Inlined and refactored IEEE 754 float conversion routines * - Usage of stdbool for function return values and boolean parameters * - Renamed functions and structures for clarity * - Interface adapated to ucode C api * - Removed unused code */ /** * # Handle Packed Binary Data * * The `struct` module provides routines for interpreting byte strings as packed * binary data. * * Functions can be individually imported and directly accessed using the * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import#named_import named import} * syntax: * * ``` * import { pack, unpack } from 'struct'; * * let buffer = pack('bhl', -13, 1234, 444555666); * let values = unpack('bhl', buffer); * ``` * * Alternatively, the module namespace can be imported * using a wildcard import statement: * * ``` * import * as struct from 'struct'; * * let buffer = struct.pack('bhl', -13, 1234, 444555666); * let values = struct.unpack('bhl', buffer); * ``` * * Additionally, the struct module namespace may also be imported by invoking * the `ucode` interpreter with the `-lstruct` switch. * * ## Format Strings * * Format strings describe the data layout when packing and unpacking data. * They are built up from format-characters, which specify the type of data * being packed/unpacked. In addition, special characters control the byte * order, size and alignment. * * Each format string consists of an optional prefix character which describes * the overall properties of the data and one or more format characters which * describe the actual data values and padding. * * ### Byte Order, Size, and Alignment * * By default, C types are represented in the machine's native format and byte * order, and properly aligned by skipping pad bytes if necessary (according to * the rules used by the C compiler). * * This behavior is chosen so that the bytes of a packed struct correspond * exactly to the memory layout of the corresponding C struct. * * Whether to use native byte ordering and padding or standard formats depends * on the application. * * Alternatively, the first character of the format string can be used to indicate * the byte order, size and alignment of the packed data, according to the * following table: * * | Character | Byte order | Size | Alignment | * |-----------|------------------------|----------|-----------| * | `@` | native | native | native | * | `=` | native | standard | none | * | `<` | little-endian | standard | none | * | `>` | big-endian | standard | none | * | `!` | network (= big-endian) | standard | none | * * If the first character is not one of these, `'@'` is assumed. * * Native byte order is big-endian or little-endian, depending on the * host system. For example, Intel x86, AMD64 (x86-64), and Apple M1 are * little-endian; IBM z and many legacy architectures are big-endian. * * Native size and alignment are determined using the C compiler's * `sizeof` expression. This is always combined with native byte order. * * Standard size depends only on the format character; see the table in * the `format-characters` section. * * Note the difference between `'@'` and `'='`: both use native byte order, * but the size and alignment of the latter is standardized. * * The form `'!'` represents the network byte order which is always big-endian * as defined in `IETF RFC 1700`. * * There is no way to indicate non-native byte order (force byte-swapping); use * the appropriate choice of `'<'` or `'>'`. * * Notes: * * (1) Padding is only automatically added between successive structure members. * No padding is added at the beginning or the end of the encoded struct. * * (2) No padding is added when using non-native size and alignment, e.g. * with '<', '>', '=', and '!'. * * (3) To align the end of a structure to the alignment requirement of a * particular type, end the format with the code for that type with a repeat * count of zero. * * * ### Format Characters * * Format characters have the following meaning; the conversion between C and * ucode values should be obvious given their types. The 'Standard size' column * refers to the size of the packed value in bytes when using standard size; * that is, when the format string starts with one of `'<'`, `'>'`, `'!'` or * `'='`. When using native size, the size of the packed value is platform * dependent. * * | Format | C Type | Ucode type | Standard size | Notes | * |--------|----------------------|------------|----------------|----------| * | `x` | *pad byte* | *no value* | | (7) | * | `c` | `char` | string | 1 | | * | `b` | `signed char` | int | 1 | (1), (2) | * | `B` | `unsigned char` | int | 1 | (2) | * | `?` | `_Bool` | bool | 1 | (1) | * | `h` | `short` | int | 2 | (2) | * | `H` | `unsigned short` | int | 2 | (2) | * | `i` | `int` | int | 4 | (2) | * | `I` | `unsigned int` | int | 4 | (2) | * | `l` | `long` | int | 4 | (2) | * | `L` | `unsigned long` | int | 4 | (2) | * | `q` | `long long` | int | 8 | (2) | * | `Q` | `unsigned long long` | int | 8 | (2) | * | `n` | `ssize_t` | int | | (3) | * | `N` | `size_t` | int | | (3) | * | `e` | (6) | double | 2 | (4) | * | `f` | `float` | double | 4 | (4) | * | `d` | `double` | double | 8 | (4) | * | `s` | `char[]` | double | | (9) | * | `p` | `char[]` | double | | (8) | * | `P` | `void *` | int | | (5) | * | `*` | `char[]` | string | | (10) | * * Notes: * * - (1) The `'?'` conversion code corresponds to the `_Bool` type defined by * C99. If this type is not available, it is simulated using a `char`. In * standard mode, it is always represented by one byte. * * - (2) When attempting to pack a non-integer using any of the integer * conversion codes, this module attempts to convert the given value into an * integer. If the value is not convertible, a type error exception is thrown. * * - (3) The `'n'` and `'N'` conversion codes are only available for the native * size (selected as the default or with the `'@'` byte order character). * For the standard size, you can use whichever of the other integer formats * fits your application. * * - (4) For the `'f'`, `'d'` and `'e'` conversion codes, the packed * representation uses the IEEE 754 binary32, binary64 or binary16 format * (for `'f'`, `'d'` or `'e'` respectively), regardless of the floating-point * format used by the platform. * * - (5) The `'P'` format character is only available for the native byte * ordering (selected as the default or with the `'@'` byte order character). * The byte order character `'='` chooses to use little- or big-endian * ordering based on the host system. The struct module does not interpret * this as native ordering, so the `'P'` format is not available. * * - (6) The IEEE 754 binary16 "half precision" type was introduced in the 2008 * revision of the `IEEE 754` standard. It has a sign bit, a 5-bit exponent * and 11-bit precision (with 10 bits explicitly stored), and can represent * numbers between approximately `6.1e-05` and `6.5e+04` at full precision. * This type is not widely supported by C compilers: on a typical machine, an * unsigned short can be used for storage, but not for math operations. See * the Wikipedia page on the `half-precision floating-point format` for more * information. * * - (7) When packing, `'x'` inserts one NUL byte. * * - (8) The `'p'` format character encodes a "Pascal string", meaning a short * variable-length string stored in a *fixed number of bytes*, given by the * count. The first byte stored is the length of the string, or 255, * whichever is smaller. The bytes of the string follow. If the string * passed in to `pack()` is too long (longer than the count minus 1), only * the leading `count-1` bytes of the string are stored. If the string is * shorter than `count-1`, it is padded with null bytes so that exactly count * bytes in all are used. Note that for `unpack()`, the `'p'` format * character consumes `count` bytes, but that the string returned can never * contain more than 255 bytes. * * - (9) For the `'s'` format character, the count is interpreted as the length * of the bytes, not a repeat count like for the other format characters; for * example, `'10s'` means a single 10-byte string mapping to or from a single * ucode byte string, while `'10c'` means 10 separate one byte character * elements (e.g., `cccccccccc`) mapping to or from ten different ucode byte * strings. If a count is not given, it defaults to 1. For packing, the * string is truncated or padded with null bytes as appropriate to make it * fit. For unpacking, the resulting bytes object always has exactly the * specified number of bytes. As a special case, `'0s'` means a single, * empty string (while `'0c'` means 0 characters). * * - (10) The `*` format character serves as wildcard. For `pack()` it will * append the corresponding byte argument string as-is, not applying any * padding or zero filling. When a repeat count is given, that many bytes of * the input byte string argument will be appended at most on `pack()`, * effectively truncating longer input strings. For `unpack()`, the wildcard * format will yield a byte string containing the entire remaining input data * bytes, or - when a repeat count is given - that many bytes of input data * at most. * * A format character may be preceded by an integral repeat count. For example, * the format string `'4h'` means exactly the same as `'hhhh'`. * * Whitespace characters between formats are ignored; a count and its format * must not contain whitespace though. * * When packing a value `x` using one of the integer formats (`'b'`, * `'B'`, `'h'`, `'H'`, `'i'`, `'I'`, `'l'`, `'L'`, * `'q'`, `'Q'`), if `x` is outside the valid range for that format, a type * error exception is raised. * * For the `'?'` format character, the return value is either `true` or `false`. * When packing, the truish result value of the argument is used. Either 0 or 1 * in the native or standard bool representation will be packed, and any * non-zero value will be `true` when unpacking. * * ## Examples * * Note: * Native byte order examples (designated by the `'@'` format prefix or * lack of any prefix character) may not match what the reader's * machine produces as * that depends on the platform and compiler. * * Pack and unpack integers of three different sizes, using big endian * ordering: * * ``` * import { pack, unpack } from 'struct'; * * pack(">bhl", 1, 2, 3); // "\x01\x00\x02\x00\x00\x00\x03" * unpack(">bhl", "\x01\x00\x02\x00\x00\x00\x03"); // [ 1, 2, 3 ] * ``` * * Attempt to pack an integer which is too large for the defined field: * * ```bash * $ ucode -lstruct -p 'struct.pack(">h", 99999)' * Type error: Format 'h' requires numeric argument between -32768 and 32767 * In [-p argument], line 1, byte 24: * * `struct.pack(">h", 99999)` * Near here -------------^ * ``` * * Demonstrate the difference between `'s'` and `'c'` format characters: * * ``` * import { pack } from 'struct'; * * pack("@ccc", "1", "2", "3"); // "123" * pack("@3s", "123"); // "123" * ``` * * The ordering of format characters may have an impact on size in native * mode since padding is implicit. In standard mode, the user is * responsible for inserting any desired padding. * * Note in the first `pack()` call below that three NUL bytes were added after * the packed `'#'` to align the following integer on a four-byte boundary. * In this example, the output was produced on a little endian machine: * * ``` * import { pack } from 'struct'; * * pack("@ci", "#", 0x12131415); // "#\x00\x00\x00\x15\x14\x13\x12" * pack("@ic", 0x12131415, "#"); // "\x15\x14\x13\x12#" * ``` * * The following format `'ih0i'` results in two pad bytes being added at the * end, assuming the platform's ints are aligned on 4-byte boundaries: * * ``` * import { pack } from 'struct'; * * pack("ih0i", 0x01010101, 0x0202); // "\x01\x01\x01\x01\x02\x02\x00\x00" * ``` * * Use the wildcard format to extract the remainder of the input data: * * ``` * import { unpack } from 'struct'; * * unpack("ccc*", "foobarbaz"); // [ "f", "o", "o", "barbaz" ] * unpack("ccc3*", "foobarbaz"); // [ "f", "o", "o", "bar" ] * ``` * * Use the wildcard format to pack binary stings as-is into the result data: * * ``` * import { pack } from 'struct'; * * pack("h*h", 0x0101, "\x02\x00\x03", 0x0404); // "\x01\x01\x02\x00\x03\x04\x04" * pack("c3*c", "a", "foobar", "c"); // "afooc" * ``` * * @module struct */ #include #include #include #include #include #include #include #include "ucode/module.h" #include "ucode/vallist.h" static uc_resource_type_t *struct_type; typedef struct formatdef { char format; ssize_t size; ssize_t alignment; uc_value_t* (*unpack)(uc_vm_t *, const char *, const struct formatdef *); bool (*pack)(uc_vm_t *, char *, uc_value_t *, const struct formatdef *); } formatdef_t; typedef struct { const formatdef_t *fmtdef; ssize_t offset; ssize_t size; ssize_t repeat; } formatcode_t; typedef struct { size_t len; size_t size; size_t ncodes; formatcode_t codes[]; } formatstate_t; /* Define various structs to figure out the alignments of types */ typedef struct { char c; short x; } st_short; typedef struct { char c; int x; } st_int; typedef struct { char c; long x; } st_long; typedef struct { char c; float x; } st_float; typedef struct { char c; double x; } st_double; typedef struct { char c; void *x; } st_void_p; typedef struct { char c; size_t x; } st_size_t; typedef struct { char c; bool x; } st_bool; typedef struct { char c; long long x; } s_long_long; #define SHORT_ALIGN (sizeof(st_short) - sizeof(short)) #define INT_ALIGN (sizeof(st_int) - sizeof(int)) #define LONG_ALIGN (sizeof(st_long) - sizeof(long)) #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float)) #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double)) #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *)) #define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t)) #define BOOL_ALIGN (sizeof(st_bool) - sizeof(bool)) #define LONG_LONG_ALIGN (sizeof(s_long_long) - sizeof(long long)) #ifdef __powerc #pragma options align=reset #endif static bool ucv_as_long(uc_vm_t *vm, uc_value_t *v, long *p) { char *s, *e; int64_t i; double d; long x; errno = 0; switch (ucv_type(v)) { case UC_INTEGER: i = ucv_int64_get(v); if (i < LONG_MIN || i > LONG_MAX) errno = ERANGE; x = (long)i; break; case UC_DOUBLE: d = ucv_double_get(v); x = (long)d; if (isnan(d) || d < (double)LONG_MIN || d > (double)LONG_MAX || d - x != 0) errno = ERANGE; break; case UC_BOOLEAN: x = (long)ucv_boolean_get(v); break; case UC_NULL: x = 0; break; case UC_STRING: s = ucv_string_get(v); x = strtol(s, &e, 0); if (e == s || *e != '\0') errno = EINVAL; break; default: errno = EINVAL; x = 0; break; } if (errno != 0) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, (errno == ERANGE) ? "Argument out of range" : "Argument not convertible to number"); return false; } *p = x; return true; } static bool ucv_as_ulong(uc_vm_t *vm, uc_value_t *v, unsigned long *p) { unsigned long x; char *s, *e; uint64_t i; double d; errno = 0; switch (ucv_type(v)) { case UC_INTEGER: i = ucv_uint64_get(v); if (i > ULONG_MAX) errno = ERANGE; x = (unsigned long)i; break; case UC_DOUBLE: d = ucv_double_get(v); x = (unsigned long)d; if (isnan(d) || d < 0 || d > (double)ULONG_MAX || d - x != 0) errno = ERANGE; break; case UC_BOOLEAN: x = (unsigned long)ucv_boolean_get(v); break; case UC_NULL: x = 0; break; case UC_STRING: s = ucv_string_get(v); x = strtoul(s, &e, 0); if (e == s || *e != '\0') errno = EINVAL; break; default: errno = EINVAL; x = 0; break; } if (errno != 0) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, (errno == ERANGE) ? "Argument out of range" : "Argument not convertible to number"); return false; } *p = x; return true; } static bool ucv_as_longlong(uc_vm_t *vm, uc_value_t *v, long long *p) { char *s, *e; long long x; int64_t i; double d; errno = 0; switch (ucv_type(v)) { case UC_INTEGER: i = ucv_int64_get(v); if (i < LLONG_MIN || i > LLONG_MAX) errno = ERANGE; x = (long long)i; break; case UC_DOUBLE: d = ucv_double_get(v); x = (long long)d; if (isnan(d) || d < (double)LLONG_MIN || d > (double)LLONG_MAX || d - x != 0) errno = ERANGE; break; case UC_BOOLEAN: x = (long long)ucv_boolean_get(v); break; case UC_NULL: x = 0; break; case UC_STRING: s = ucv_string_get(v); x = strtoll(s, &e, 0); if (e == s || *e != '\0') errno = EINVAL; break; default: errno = EINVAL; x = 0; break; } if (errno != 0) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, (errno == ERANGE) ? "Argument out of range" : "Argument not convertible to number"); return false; } *p = x; return true; } static bool ucv_as_ulonglong(uc_vm_t *vm, uc_value_t *v, unsigned long long *p) { unsigned long long x; char *s, *e; uint64_t i; double d; errno = 0; switch (ucv_type(v)) { case UC_INTEGER: i = ucv_uint64_get(v); if (i > ULLONG_MAX) errno = ERANGE; x = (unsigned long long)i; break; case UC_DOUBLE: d = ucv_double_get(v); x = (unsigned long long)d; if (isnan(d) || d < 0 || d > (double)ULLONG_MAX || d - x != 0) errno = ERANGE; break; case UC_BOOLEAN: x = (unsigned long long)ucv_boolean_get(v); break; case UC_NULL: x = 0; break; case UC_STRING: s = ucv_string_get(v); x = strtoull(s, &e, 0); if (e == s || *e != '\0') errno = EINVAL; break; default: errno = EINVAL; x = 0; break; } if (errno != 0) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, (errno == ERANGE) ? "Argument out of range" : "Argument not convertible to number"); return false; } *p = x; return true; } static bool ucv_as_ssize_t(uc_vm_t *vm, uc_value_t *v, ssize_t *p) { char *s, *e; int64_t i; ssize_t x; double d; errno = 0; switch (ucv_type(v)) { case UC_INTEGER: i = ucv_int64_get(v); if (i < -1 || i > SSIZE_MAX) errno = ERANGE; x = (ssize_t)i; break; case UC_DOUBLE: d = ucv_double_get(v); x = (ssize_t)d; if (isnan(d) || d < -1 || d > (double)SSIZE_MAX || d - x != 0) errno = ERANGE; break; case UC_BOOLEAN: x = (ssize_t)ucv_boolean_get(v); break; case UC_NULL: x = 0; break; case UC_STRING: s = ucv_string_get(v); i = strtoll(s, &e, 0); if (e == s || *e != '\0') errno = EINVAL; else if (i < -1 || i > SSIZE_MAX) errno = ERANGE; x = (ssize_t)i; break; default: errno = EINVAL; x = 0; break; } if (errno != 0) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, (errno == ERANGE) ? "Argument out of range" : "Argument not convertible to number"); return false; } *p = x; return true; } /* Same, but handling size_t */ static bool ucv_as_size_t(uc_vm_t *vm, uc_value_t *v, size_t *p) { char *s, *e; uint64_t i; double d; size_t x; errno = 0; switch (ucv_type(v)) { case UC_INTEGER: i = ucv_uint64_get(v); if (i > SIZE_MAX) errno = ERANGE; x = (size_t)i; break; case UC_DOUBLE: d = ucv_double_get(v); x = (size_t)d; if (isnan(d) || d < 0 || d > (double)SIZE_MAX || d - x != 0) errno = ERANGE; break; case UC_BOOLEAN: x = (size_t)ucv_boolean_get(v); break; case UC_NULL: x = 0; break; case UC_STRING: s = ucv_string_get(v); i = strtoull(s, &e, 0); if (e == s || *e != '\0') errno = EINVAL; else if (i > SIZE_MAX) errno = ERANGE; x = (size_t)i; break; default: errno = EINVAL; x = 0; break; } if (errno != 0) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, (errno == ERANGE) ? "Argument out of range" : "Argument not convertible to number"); return false; } *p = x; return true; } static bool ucv_as_double(uc_vm_t *vm, uc_value_t *v, double *p) { char *s, *e; int64_t i; double x; errno = 0; switch (ucv_type(v)) { case UC_INTEGER: i = ucv_int64_get(v); if (errno == 0) { if (i < -DBL_MAX || i > DBL_MAX) errno = ERANGE; } x = (double)i; break; case UC_DOUBLE: x = ucv_double_get(v); break; case UC_BOOLEAN: x = (double)ucv_boolean_get(v); break; case UC_NULL: x = 0.0; break; case UC_STRING: s = ucv_string_get(v); x = strtod(s, &e); if (e == s || *e != '\0') errno = EINVAL; break; default: errno = EINVAL; x = 0.0; break; } if (errno != 0) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, (errno == ERANGE) ? "Argument out of range" : "Argument not convertible to number"); return false; } *p = x; return true; } /* Floating point helpers */ static bool double_pack16(double d, char *buf, bool little_endian) { int32_t exponent = 0; uint16_t bits = 0; bool sign = false; double fraction; uint8_t *p; if (d == 0.0) { sign = (copysign(1.0, d) == -1.0); } else if (isnan(d)) { sign = (copysign(1.0, d) == -1.0); exponent = 0x1f; bits = 512; } else if (!isfinite(d)) { sign = (d < 0.0); exponent = 0x1f; } else { if (d < 0.0) { sign = true; d = -d; } fraction = frexp(d, &exponent); assert(fraction >= 0.5 && fraction < 1.0); fraction *= 2.0; exponent--; if (exponent >= 16) { errno = ERANGE; return false; } else if (exponent < -25) { fraction = 0.0; exponent = 0; } else if (exponent < -14) { fraction = ldexp(fraction, 14 + exponent); exponent = 0; } else { fraction -= 1.0; exponent += 15; } fraction *= 1024.0; bits = (uint16_t)fraction; assert(bits < 1024); assert(exponent < 31); if ((fraction - bits > 0.5) || ((fraction - bits == 0.5) && (bits % 2))) { if (++bits == 1024) { bits = 0; if (++exponent == 31) { errno = ERANGE; return false; } } } } bits |= (exponent << 10) | (sign << 15); p = (uint8_t *)buf + little_endian; *p = (bits >> 8) & 0xff; p += (little_endian ? -1 : 1); *p = bits & 0xff; return true; } static bool double_pack32(double d, char *buf, bool little_endian) { int8_t step = little_endian ? -1 : 1; int32_t exponent = 0; uint32_t bits = 0; bool sign = false; double fraction; uint8_t *p; if (d == 0.0) { sign = (copysign(1.0, d) == -1.0); } else if (isnan(d)) { sign = (copysign(1.0, d) == -1.0); exponent = 0xff; bits = 0x7fffff; } else if (!isfinite(d)) { sign = (d < 0.0); exponent = 0xff; } else { if (d < 0.0) { sign = true; d = -d; } fraction = frexp(d, &exponent); if (fraction == 0.0) { exponent = 0; } else { assert(fraction >= 0.5 && fraction < 1.0); fraction *= 2.0; exponent--; } if (exponent >= 128) { errno = ERANGE; return false; } else if (exponent < -126) { fraction = ldexp(fraction, 126 + exponent); exponent = 0; } else if (exponent != 0 || fraction != 0.0) { fraction -= 1.0; exponent += 127; } fraction *= 8388608.0; bits = (uint32_t)(fraction + 0.5); assert(bits <= 8388608); if (bits >> 23) { bits = 0; if (++exponent >= 255) { errno = ERANGE; return false; } } } p = (uint8_t *)buf + (little_endian ? 3 : 0); *p = (sign << 7) | (exponent >> 1); p += step; *p = ((exponent & 1) << 7) | (bits >> 16); p += step; *p = (bits >> 8) & 0xff; p += step; *p = bits & 0xff; return true; } #define double_pack64 uc_double_pack static double double_unpack16(const char *buf, bool little_endian) { uint32_t fraction; int32_t exponent; uint8_t *p; bool sign; double d; p = (uint8_t *)buf + little_endian; sign = (*p >> 7) & 1; exponent = (*p & 0x7c) >> 2; fraction = (*p & 0x03) << 8; p += little_endian ? -1 : 1; fraction |= *p; if (exponent == 0x1f) { if (fraction == 0) return sign ? -INFINITY : INFINITY; else return sign ? -NAN : NAN; } d = (double)fraction / 1024.0; if (exponent == 0) { exponent = -14; } else { exponent -= 15; d += 1.0; } d = ldexp(d, exponent); return sign ? -d : d; } static double double_unpack32(const char *buf, bool little_endian) { int8_t step = little_endian ? -1 : 1; uint32_t fraction; int32_t exponent; uint8_t *p; bool sign; double d; p = (uint8_t *)buf + (little_endian ? 3 : 0); sign = (*p >> 7) & 1; exponent = (*p & 0x7f) << 1; p += step; exponent |= (*p >> 7) & 1; fraction = (*p & 0x7f) << 16; p += step; fraction |= *p << 8; p += step; fraction |= *p; if (exponent == 0xff) { if (fraction == 0) return sign ? -INFINITY : INFINITY; else return sign ? -NAN : NAN; } d = (double)fraction / 8388608.0; if (exponent == 0) { exponent = -126; } else { exponent -= 127; d += 1.0; } d = ldexp(d, exponent); return sign ? -d : d; } #define double_unpack64 uc_double_unpack static bool range_exception(uc_vm_t *vm, const formatdef_t *f, bool is_unsigned) { /* ulargest is the largest unsigned value with f->size bytes. * Note that the simpler: * ((size_t)1 << (f->size * 8)) - 1 * doesn't work when f->size == sizeof(size_t) because C doesn't * define what happens when a left shift count is >= the number of * bits in the integer being shifted; e.g., on some boxes it doesn't * shift at all when they're equal. */ const size_t ulargest = (size_t)-1 >> ((sizeof(size_t) - f->size)*8); assert(f->size >= 1 && f->size <= (ssize_t)sizeof(size_t)); if (is_unsigned) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Format '%c' requires numeric argument between 0 and %zu", f->format, ulargest); } else { const ssize_t largest = (ssize_t)(ulargest >> 1); uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Format '%c' requires numeric argument between %zd and %zd", f->format, ~ largest, largest); } return false; } /* Native mode routines. ****************************************************/ static uc_value_t * native_unpack_char(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_string_new_length(p, 1); } static uc_value_t * native_unpack_byte(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_int64_new(*(signed char *)p); } static uc_value_t * native_unpack_ubyte(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_uint64_new(*(unsigned char *)p); } static uc_value_t * native_unpack_short(uc_vm_t *vm, const char *p, const formatdef_t *f) { short x = 0; memcpy(&x, p, sizeof(x)); return ucv_int64_new(x); } static uc_value_t * native_unpack_ushort(uc_vm_t *vm, const char *p, const formatdef_t *f) { unsigned short x = 0; memcpy(&x, p, sizeof(x)); return ucv_uint64_new(x); } static uc_value_t * native_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) { int x = 0; memcpy(&x, p, sizeof(x)); return ucv_int64_new(x); } static uc_value_t * native_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) { unsigned int x = 0; memcpy(&x, p, sizeof(x)); return ucv_uint64_new(x); } static uc_value_t * native_unpack_long(uc_vm_t *vm, const char *p, const formatdef_t *f) { long x = 0; memcpy(&x, p, sizeof(x)); return ucv_int64_new(x); } static uc_value_t * native_unpack_ulong(uc_vm_t *vm, const char *p, const formatdef_t *f) { unsigned long x = 0; memcpy(&x, p, sizeof(x)); return ucv_uint64_new(x); } static uc_value_t * native_unpack_ssize_t(uc_vm_t *vm, const char *p, const formatdef_t *f) { ssize_t x = 0; memcpy(&x, p, sizeof(x)); return ucv_int64_new(x); } static uc_value_t * native_unpack_size_t(uc_vm_t *vm, const char *p, const formatdef_t *f) { size_t x = 0; memcpy(&x, p, sizeof(x)); return ucv_uint64_new(x); } static uc_value_t * native_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) { long long x = 0; memcpy(&x, p, sizeof(x)); return ucv_int64_new(x); } static uc_value_t * native_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) { unsigned long long x = 0; memcpy(&x, p, sizeof(x)); return ucv_uint64_new(x); } static uc_value_t * native_unpack_bool(uc_vm_t *vm, const char *p, const formatdef_t *f) { bool x = false; memcpy(&x, p, sizeof(x)); return ucv_boolean_new(x != 0); } static uc_value_t * native_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) { #if __BYTE_ORDER == __LITTLE_ENDIAN return ucv_double_new(double_unpack16(p, true)); #else return ucv_double_new(double_unpack16(p, false)); #endif } static uc_value_t * native_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) { float x = 0.0; memcpy(&x, p, sizeof(x)); return ucv_double_new(x); } static uc_value_t * native_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) { double x = 0.0; memcpy(&x, p, sizeof(x)); return ucv_double_new(x); } static uc_value_t * native_unpack_void_p(uc_vm_t *vm, const char *p, const formatdef_t *f) { void *x = NULL; memcpy(&x, p, sizeof(x)); return ucv_int64_new((intptr_t)x); } static bool native_pack_byte(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { long x = 0; if (!ucv_as_long(vm, v, &x)) return false; if (x < -128 || x > 127) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Byte format requires numeric value between -128 and 127"); return false; } *p = (char)x; return true; } static bool native_pack_ubyte(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { long x = 0; if (!ucv_as_long(vm, v, &x)) return false; if (x < 0 || x > 255) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Unsigned byte format requires numeric value between 0 and 255"); return false; } *(unsigned char *)p = (unsigned char)x; return true; } static bool native_pack_char(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { char *s = NULL; if (ucv_type(v) == UC_STRING) { s = ucv_string_get(v); *p = *s; } else { s = ucv_to_string(vm, v); *p = *s; free(s); } return true; } static bool native_pack_short(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { long x = 0; short y = 0; if (!ucv_as_long(vm, v, &x)) return false; if (x < SHRT_MIN || x > SHRT_MAX) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Short format requires numeric value between %d and %d", (int)SHRT_MIN, (int)SHRT_MAX); return false; } y = (short)x; memcpy(p, &y, sizeof(y)); return true; } static bool native_pack_ushort(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned short y = 0; long x = 0; if (!ucv_as_long(vm, v, &x)) return false; if (x < 0 || x > USHRT_MAX) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Unsigned short format requires numeric value between 0 and %u", (unsigned int)USHRT_MAX); return false; } y = (unsigned short)x; memcpy(p, &y, sizeof(y)); return true; } static bool native_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { long x = 0; int y = 0; if (!ucv_as_long(vm, v, &x)) return false; if (sizeof(long) > sizeof(int)) { if ((x < ((long)INT_MIN)) || (x > ((long)INT_MAX))) return range_exception(vm, f, false); } y = (int)x; memcpy(p, &y, sizeof(y)); return true; } static bool native_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned long x = 0; unsigned int y = 0; if (!ucv_as_ulong(vm, v, &x)) return false; if (sizeof(long) > sizeof(int)) { if (x > ((unsigned long)UINT_MAX)) return range_exception(vm, f, true); } y = (unsigned int)x; memcpy(p, &y, sizeof(y)); return true; } static bool native_pack_long(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { long x = 0; if (!ucv_as_long(vm, v, &x)) return false; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_ulong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned long x = 0; if (!ucv_as_ulong(vm, v, &x)) return false; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_ssize_t(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { ssize_t x = 0; if (!ucv_as_ssize_t(vm, v, &x)) return false; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_size_t(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { size_t x = 0; if (!ucv_as_size_t(vm, v, &x)) return false; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { long long x = 0; if (!ucv_as_longlong(vm, v, &x)) return false; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned long long x = 0; if (!ucv_as_ulonglong(vm, v, &x)) return false; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_bool(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { bool x = 0; x = ucv_is_truish(v); memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x; if (!ucv_as_double(vm, v, &x)) return false; #if __BYTE_ORDER == __LITTLE_ENDIAN return double_pack16(x, p, true); #else return double_pack16(x, p, false); #endif } static bool native_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double d = 0.0; float x = 0.0; if (!ucv_as_double(vm, v, &d)) return false; x = (float)d; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x = 0.0; if (!ucv_as_double(vm, v, &x)) return false; memcpy(p, &x, sizeof(x)); return true; } static bool native_pack_void_p(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { long long int i = 0; void *x = NULL; if (!ucv_as_longlong(vm, v, &i)) return false; x = (void *)(intptr_t)i; memcpy(p, &x, sizeof(x)); return true; } static const formatdef_t native_endian_table[] = { { 'x', sizeof(char), 0, NULL, NULL }, { 'b', sizeof(char), 0, native_unpack_byte, native_pack_byte }, { 'B', sizeof(char), 0, native_unpack_ubyte, native_pack_ubyte }, { 'c', sizeof(char), 0, native_unpack_char, native_pack_char }, { '*', sizeof(char), 0, NULL, NULL }, { 's', sizeof(char), 0, NULL, NULL }, { 'p', sizeof(char), 0, NULL, NULL }, { 'h', sizeof(short), SHORT_ALIGN, native_unpack_short, native_pack_short }, { 'H', sizeof(short), SHORT_ALIGN, native_unpack_ushort, native_pack_ushort }, { 'i', sizeof(int), INT_ALIGN, native_unpack_int, native_pack_int }, { 'I', sizeof(int), INT_ALIGN, native_unpack_uint, native_pack_uint }, { 'l', sizeof(long), LONG_ALIGN, native_unpack_long, native_pack_long }, { 'L', sizeof(long), LONG_ALIGN, native_unpack_ulong, native_pack_ulong }, { 'n', sizeof(size_t), SIZE_T_ALIGN, native_unpack_ssize_t, native_pack_ssize_t }, { 'N', sizeof(size_t), SIZE_T_ALIGN, native_unpack_size_t, native_pack_size_t }, { 'q', sizeof(long long), LONG_LONG_ALIGN, native_unpack_longlong, native_pack_longlong }, { 'Q', sizeof(long long), LONG_LONG_ALIGN, native_unpack_ulonglong,native_pack_ulonglong }, { '?', sizeof(bool), BOOL_ALIGN, native_unpack_bool, native_pack_bool }, { 'e', sizeof(short), SHORT_ALIGN, native_unpack_halffloat, native_pack_halffloat }, { 'f', sizeof(float), FLOAT_ALIGN, native_unpack_float, native_pack_float }, { 'd', sizeof(double), DOUBLE_ALIGN, native_unpack_double, native_pack_double }, { 'P', sizeof(void *), VOID_P_ALIGN, native_unpack_void_p, native_pack_void_p }, { 0 } }; /* Big-endian routines. *****************************************************/ static uc_value_t * be_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; ssize_t i = f->size; long x = 0; do { x = (x<<8) | *bytes++; } while (--i > 0); /* Extend the sign bit. */ if ((ssize_t)sizeof(long) > f->size) x |= -(x & (1L << ((8 * f->size) - 1))); return ucv_int64_new(x); } static uc_value_t * be_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; ssize_t i = f->size; unsigned long x = 0; do { x = (x<<8) | *bytes++; } while (--i > 0); return ucv_uint64_new(x); } static uc_value_t * be_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; ssize_t i = f->size; long long x = 0; do { x = (x<<8) | *bytes++; } while (--i > 0); /* Extend the sign bit. */ if ((ssize_t)sizeof(long long) > f->size) x |= -(x & ((long long)1 << ((8 * f->size) - 1))); return ucv_int64_new(x); } static uc_value_t * be_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; unsigned long long x = 0; ssize_t i = f->size; do { x = (x<<8) | *bytes++; } while (--i > 0); return ucv_uint64_new(x); } static uc_value_t * be_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_double_new(double_unpack16(p, false)); } static uc_value_t * be_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_double_new(double_unpack32(p, false)); } static uc_value_t * be_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_double_new(double_unpack64(p, false)); } static uc_value_t * be_unpack_bool(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_boolean_new(*p != 0); } static bool be_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; ssize_t i = 0; long x = 0; if (!ucv_as_long(vm, v, &x)) return false; i = f->size; if (i != sizeof(long)) { if ((i == 2) && (x < -32768 || x > 32767)) return range_exception(vm, f, false); #if UINT_MAX < ULONG_MAX else if ((i == 4) && (x < -2147483648L || x > 2147483647L)) return range_exception(vm, f, false); #endif } do { q[--i] = (unsigned char)(x & 0xffL); x >>= 8; } while (i > 0); return true; } static bool be_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; unsigned long x = 0; ssize_t i = 0; if (!ucv_as_ulong(vm, v, &x)) return false; i = f->size; if (i != sizeof(long)) { unsigned long maxint = 1; maxint <<= (unsigned long)(i * 8); if (x >= maxint) return range_exception(vm, f, true); } do { q[--i] = (unsigned char)(x & 0xffUL); x >>= 8; } while (i > 0); return true; } static bool be_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; long long x = 0; ssize_t i = 0; if (!ucv_as_longlong(vm, v, &x)) return false; i = f->size; do { q[--i] = (unsigned char)(x & 0xffL); x >>= 8; } while (i > 0); return true; } static bool be_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; unsigned long long x = 0; ssize_t i = 0; if (!ucv_as_ulonglong(vm, v, &x)) return false; i = f->size; do { q[--i] = (unsigned char)(x & 0xffUL); x >>= 8; } while (i > 0); return true; } static bool be_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x = 0.0; if (!ucv_as_double(vm, v, &x)) return false; return double_pack16(x, p, false); } static bool be_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x = 0.0; if (!ucv_as_double(vm, v, &x)) return false; if (!double_pack32(x, p, 0)) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); return false; } return true; } static bool be_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x = 0.0; if (!ucv_as_double(vm, v, &x)) return false; if (!double_pack64(x, p, 0)) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); return false; } return true; } static bool be_pack_bool(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { *p = (char)ucv_is_truish(v); return true; } static formatdef_t big_endian_table[] = { { 'x', 1, 0, NULL, NULL }, { 'b', 1, 0, native_unpack_byte, native_pack_byte }, { 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte }, { 'c', 1, 0, native_unpack_char, native_pack_char }, { '*', 1, 0, NULL, NULL }, { 's', 1, 0, NULL, NULL }, { 'p', 1, 0, NULL, NULL }, { 'h', 2, 0, be_unpack_int, be_pack_int }, { 'H', 2, 0, be_unpack_uint, be_pack_uint }, { 'i', 4, 0, be_unpack_int, be_pack_int }, { 'I', 4, 0, be_unpack_uint, be_pack_uint }, { 'l', 4, 0, be_unpack_int, be_pack_int }, { 'L', 4, 0, be_unpack_uint, be_pack_uint }, { 'q', 8, 0, be_unpack_longlong, be_pack_longlong }, { 'Q', 8, 0, be_unpack_ulonglong, be_pack_ulonglong }, { '?', 1, 0, be_unpack_bool, be_pack_bool }, { 'e', 2, 0, be_unpack_halffloat, be_pack_halffloat }, { 'f', 4, 0, be_unpack_float, be_pack_float }, { 'd', 8, 0, be_unpack_double, be_pack_double }, { 0 } }; /* Little-endian routines. *****************************************************/ static uc_value_t * le_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; ssize_t i = f->size; long x = 0; do { x = (x<<8) | bytes[--i]; } while (i > 0); /* Extend the sign bit. */ if ((ssize_t)sizeof(long) > f->size) x |= -(x & (1L << ((8 * f->size) - 1))); return ucv_int64_new(x); } static uc_value_t * le_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; ssize_t i = f->size; unsigned long x = 0; do { x = (x<<8) | bytes[--i]; } while (i > 0); return ucv_uint64_new(x); } static uc_value_t * le_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; ssize_t i = f->size; long long x = 0; do { x = (x<<8) | bytes[--i]; } while (i > 0); /* Extend the sign bit. */ if ((ssize_t)sizeof(long long) > f->size) x |= -(x & ((long long)1 << ((8 * f->size) - 1))); return ucv_int64_new(x); } static uc_value_t * le_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) { const unsigned char *bytes = (const unsigned char *)p; unsigned long long x = 0; ssize_t i = f->size; do { x = (x<<8) | bytes[--i]; } while (i > 0); return ucv_uint64_new(x); } static uc_value_t * le_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_double_new(double_unpack16(p, true)); } static uc_value_t * le_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_double_new(double_unpack32(p, true)); } static uc_value_t * le_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) { return ucv_double_new(double_unpack64(p, true)); } static bool le_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; ssize_t i = 0; long x = 0; if (!ucv_as_long(vm, v, &x)) return false; i = f->size; if (i != sizeof(long)) { if ((i == 2) && (x < -32768 || x > 32767)) return range_exception(vm, f, false); #if UINT_MAX < ULONG_MAX else if ((i == 4) && (x < -2147483648L || x > 2147483647L)) return range_exception(vm, f, false); #endif } do { *q++ = (unsigned char)(x & 0xffL); x >>= 8; } while (--i > 0); return true; } static bool le_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; unsigned long x = 0; ssize_t i = 0; if (!ucv_as_ulong(vm, v, &x)) return false; i = f->size; if (i != sizeof(long)) { unsigned long maxint = 1; maxint <<= (unsigned long)(i * 8); if (x >= maxint) return range_exception(vm, f, true); } do { *q++ = (unsigned char)(x & 0xffUL); x >>= 8; } while (--i > 0); return true; } static bool le_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; long long x = 0; ssize_t i = 0; if (!ucv_as_longlong(vm, v, &x)) return false; i = f->size; do { *q++ = (unsigned char)(x & 0xffL); x >>= 8; } while (--i > 0); return true; } static bool le_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { unsigned char *q = (unsigned char *)p; unsigned long long x = 0; ssize_t i = 0; if (!ucv_as_ulonglong(vm, v, &x)) return false; i = f->size; do { *q++ = (unsigned char)(x & 0xffUL); x >>= 8; } while (--i > 0); return true; } static bool le_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x = 0.0; if (!ucv_as_double(vm, v, &x)) return false; return double_pack16(x, p, true); } static bool le_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x = 0.0; if (!ucv_as_double(vm, v, &x)) return false; if (!double_pack32(x, p, 1)) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); return false; } return true; } static bool le_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) { double x = 0.0; if (!ucv_as_double(vm, v, &x)) return false; if (!double_pack64(x, p, 1)) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); return false; } return true; } static formatdef_t little_endian_table[] = { { 'x', 1, 0, NULL, NULL }, { 'b', 1, 0, native_unpack_byte, native_pack_byte }, { 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte }, { 'c', 1, 0, native_unpack_char, native_pack_char }, { '*', 1, 0, NULL, NULL }, { 's', 1, 0, NULL, NULL }, { 'p', 1, 0, NULL, NULL }, { 'h', 2, 0, le_unpack_int, le_pack_int }, { 'H', 2, 0, le_unpack_uint, le_pack_uint }, { 'i', 4, 0, le_unpack_int, le_pack_int }, { 'I', 4, 0, le_unpack_uint, le_pack_uint }, { 'l', 4, 0, le_unpack_int, le_pack_int }, { 'L', 4, 0, le_unpack_uint, le_pack_uint }, { 'q', 8, 0, le_unpack_longlong, le_pack_longlong }, { 'Q', 8, 0, le_unpack_ulonglong, le_pack_ulonglong }, { '?', 1, 0, be_unpack_bool, be_pack_bool }, { 'e', 2, 0, le_unpack_halffloat, le_pack_halffloat }, { 'f', 4, 0, le_unpack_float, le_pack_float }, { 'd', 8, 0, le_unpack_double, le_pack_double }, { 0 } }; static const formatdef_t * select_format_table(const char **pfmt) { const char *fmt = (*pfmt)++; /* May be backed out of later */ switch (*fmt) { case '<': return little_endian_table; case '>': case '!': /* Network byte order is big-endian */ return big_endian_table; case '=': /* Host byte order -- different from native in alignment! */ #if __BYTE_ORDER == __LITTLE_ENDIAN return little_endian_table; #else return big_endian_table; #endif default: --*pfmt; /* Back out of pointer increment */ /* Fall through */ case '@': return native_endian_table; } } /* Get the table entry for a format code */ static const formatdef_t * lookup_table_entry(uc_vm_t *vm, int c, const formatdef_t *table) { for (; table->format != '\0'; table++) { if (table->format == c) { return table; } } uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Unrecognized character '%c' in struct format", c); return NULL; } /* Align a size according to a format code. Return -1 on overflow. */ static ssize_t align_for_entry(ssize_t size, const formatdef_t *e) { ssize_t extra; if (e->alignment && size > 0) { extra = (e->alignment - 1) - (size - 1) % (e->alignment); if (extra > SSIZE_MAX - size) return -1; size += extra; } return size; } static void optimize_functions(void) { /* Check endian and swap in faster functions */ const formatdef_t *native = native_endian_table; formatdef_t *other, *ptr; #if __BYTE_ORDER == __LITTLE_ENDIAN other = little_endian_table; #else other = big_endian_table; #endif /* Scan through the native table, find a matching entry in the endian table and swap in the native implementations whenever possible (64-bit platforms may not have "standard" sizes) */ while (native->format != '\0' && other->format != '\0') { ptr = other; while (ptr->format != '\0') { if (ptr->format == native->format) { /* Match faster when formats are listed in the same order */ if (ptr == other) other++; /* Only use the trick if the size matches */ if (ptr->size != native->size) break; /* Skip float and double, could be "unknown" float format */ if (ptr->format == 'd' || ptr->format == 'f') break; /* Skip bool, semantics are different for standard size */ if (ptr->format == '?') break; ptr->pack = native->pack; ptr->unpack = native->unpack; break; } ptr++; } native++; } } static formatstate_t * parse_format(uc_vm_t *vm, uc_value_t *fmtval) { ssize_t size, num, itemsize; const formatdef_t *e, *f; const char *fmt, *s; formatstate_t *state; formatcode_t *codes; size_t ncodes; char c; if (ucv_type(fmtval) != UC_STRING) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Format value not a string"); return NULL; } fmt = ucv_string_get(fmtval); if (strlen(fmt) != ucv_string_length(fmtval)) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Format string contains embedded null character"); return NULL; } f = select_format_table(&fmt); s = fmt; size = 0; ncodes = 0; while ((c = *s++) != '\0') { if (isspace(c)) continue; if ('0' <= c && c <= '9') { num = c - '0'; while ('0' <= (c = *s++) && c <= '9') { /* overflow-safe version of if (num*10 + (c - '0') > SSIZE_MAX) { ... } */ if (num >= SSIZE_MAX / 10 && ( num > SSIZE_MAX / 10 || (c - '0') > SSIZE_MAX % 10)) goto overflow; num = num*10 + (c - '0'); } if (c == '\0') { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Format string contains repeat count given without format specifier"); return NULL; } } else num = 1; e = lookup_table_entry(vm, c, f); if (e == NULL) return NULL; switch (c) { case '*': /* fall through */ case 's': case 'p': ncodes++; break; case 'x': break; default: if (num) ncodes++; break; } itemsize = e->size; size = align_for_entry(size, e); if (size == -1) goto overflow; /* if (size + num * itemsize > SSIZE_MAX) { ... } */ if (num > (SSIZE_MAX - size) / itemsize) goto overflow; size += (c != '*') ? num * itemsize : 0; } /* check for overflow */ if ((ncodes + 1) > ((size_t)SSIZE_MAX / sizeof(formatcode_t))) { uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Out of memory"); return NULL; } state = xalloc(sizeof(*state) + ncodes * sizeof(formatcode_t)); state->size = size; state->ncodes = ncodes; codes = state->codes; s = fmt; size = 0; while ((c = *s++) != '\0') { if (isspace(c)) continue; if ('0' <= c && c <= '9') { num = c - '0'; while ('0' <= (c = *s++) && c <= '9') num = num*10 + (c - '0'); } else if (c == '*') num = -1; else num = 1; e = lookup_table_entry(vm, c, f); if (e == NULL) continue; size = align_for_entry(size, e); if (c == '*' || c == 's' || c == 'p') { codes->offset = size; codes->size = num; codes->fmtdef = e; codes->repeat = 1; codes++; size += (c != '*') ? num : 0; } else if (c == 'x') { size += num; } else if (num) { codes->offset = size; codes->size = e->size; codes->fmtdef = e; codes->repeat = num; codes++; size += e->size * num; } } return state; overflow: uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Total struct size too long"); return NULL; } static uc_value_t * uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) { size_t ncode, arg, off; formatcode_t *code; uc_string_t *buf; ssize_t size, n; const void *p; for (ncode = 0, code = &state->codes[0], arg = argoff, off = 0; ncode < state->ncodes; code = &state->codes[++ncode]) { if (code->fmtdef->format == '*') { uc_value_t *v = uc_fn_arg(arg++); if (ucv_type(v) != UC_STRING) continue; n = ucv_string_length(v); if (code->size == -1 || code->size > n) off += n; else off += code->size; } else { arg += code->repeat; } } buf = xalloc(sizeof(*buf) + state->size + off + 1); buf->header.type = UC_STRING; buf->header.refcount = 1; buf->length = state->size + off; for (ncode = 0, code = &state->codes[0], off = 0; ncode < state->ncodes; code = &state->codes[++ncode]) { const formatdef_t *e = code->fmtdef; char *res = buf->str + code->offset + off; ssize_t j = code->repeat; while (j--) { uc_value_t *v = uc_fn_arg(argoff++); size = code->size; if (e->format == '*') { if (ucv_type(v) != UC_STRING) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument for '*' must be a string"); goto err; } n = ucv_string_length(v); p = ucv_string_get(v); if (size == -1 || n < size) size = n; else if (n > size) n = size; off += size; if (n > 0) memcpy(res, p, n); } else if (e->format == 's') { if (ucv_type(v) != UC_STRING) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument for 's' must be a string"); goto err; } n = ucv_string_length(v); p = ucv_string_get(v); if (n > size) n = size; if (n > 0) memcpy(res, p, n); } else if (e->format == 'p') { if (ucv_type(v) != UC_STRING) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument for 'p' must be a string"); goto err; } n = ucv_string_length(v); p = ucv_string_get(v); if (n > (size - 1)) n = size - 1; if (n > 0) memcpy(res + 1, p, n); if (n > 255) n = 255; *res = (unsigned char)n; } else { if (!e->pack(vm, res, v, e)) goto err; } res += size; } } return &buf->header; err: free(buf); return NULL; } static uc_value_t * uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) { uc_value_t *bufval = uc_fn_arg(argoff); uc_value_t *offset = uc_fn_arg(argoff + 1); const char *startfrom = NULL; ssize_t bufrem, size, n; uc_value_t *result; formatcode_t *code; size_t ncode, off; if (ucv_type(bufval) != UC_STRING) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Buffer value not a string"); return NULL; } startfrom = ucv_string_get(bufval); bufrem = ucv_string_length(bufval); if (offset) { if (ucv_type(offset) != UC_INTEGER) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Offset value not an integer"); return NULL; } n = (ssize_t)ucv_int64_get(offset); if (n < 0) n += bufrem; if (n < 0 || n >= bufrem) return NULL; startfrom += n; bufrem -= n; } result = ucv_array_new(vm); for (ncode = 0, code = &state->codes[0], off = 0; ncode < state->ncodes; code = &state->codes[++ncode]) { const formatdef_t *e = code->fmtdef; const char *res = startfrom + code->offset + off; ssize_t j = code->repeat; while (j--) { uc_value_t *v = NULL; size = code->size; if (e->format == '*') { if (size == -1 || size > bufrem) size = bufrem; off += size; } else if (size > bufrem) { goto fail; } if (e->format == 's' || e->format == '*') { v = ucv_string_new_length(res, size); } else if (e->format == 'p') { n = *(unsigned char *)res; if (n >= size) n = (size > 0 ? size - 1 : 0); v = ucv_string_new_length(res + 1, n); } else { v = e->unpack(vm, res, e); } if (v == NULL) goto fail; ucv_array_push(result, v); res += size; bufrem -= size; } } return result; fail: ucv_put(result); return NULL; } /** * Pack given values according to specified format. * * The `pack()` function creates a byte string containing the argument values * packed according to the given format string. * * Returns the packed string. * * Raises a runtime exception if a given argument value does not match the * required type of the corresponding format string directive or if and invalid * format string is provided. * * @function module:struct#pack * * @param {string} format * The format string. * * @param {...*} values * Variable number of values to pack. * * @returns {string} * * @example * // Pack the values 1, 2, 3 as three consecutive unsigned int values * // in network byte order. * const data = pack('!III', 1, 2, 3); */ static uc_value_t * uc_pack(uc_vm_t *vm, size_t nargs) { uc_value_t *fmtval = uc_fn_arg(0); uc_value_t *res = NULL; formatstate_t *state; state = parse_format(vm, fmtval); if (!state) return NULL; res = uc_pack_common(vm, nargs, state, 1); free(state); return res; } /** * Unpack given byte string according to specified format. * * The `unpack()` function interpretes a byte string according to the given * format string and returns the resulting values. If the optional offset * argument is given, unpacking starts from this byte position within the input. * If not specified, the start offset defaults to `0`, the start of the given * input string. * * Returns an array of unpacked values. * * Raises a runtime exception if the format string is invalid or if an invalid * input string or offset value is given. * * @function module:struct#unpack * * @param {string} format * The format string. * * @param {string} input * The input string to unpack. * * @param {number} [offset=0] * The offset within the input string to start unpacking from. * * @returns {array} * * @example * // Unpack three consecutive unsigned int values in network byte order. * const numbers = * unpack('!III', '\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03'); * print(numbers, "\n"); // [ 1, 2, 3 ] */ static uc_value_t * uc_unpack(uc_vm_t *vm, size_t nargs) { uc_value_t *fmtval = uc_fn_arg(0); uc_value_t *res = NULL; formatstate_t *state; state = parse_format(vm, fmtval); if (!state) return NULL; res = uc_unpack_common(vm, nargs, state, 1); free(state); return res; } /** * Represents a struct instance created by `new()`. * * @class module:struct.instance * @hideconstructor * * @see {@link module:struct#new|new()} * * @example * * const fmt = struct.new(…); * * fmt.pack(…); * * const values = fmt.unpack(…); */ /** * Precompile format string. * * The `new()` function precompiles the given format string argument and returns * a `struct` object instance useful for packing and unpacking multiple items * without having to recompute the internal format each time. * * Returns an precompiled struct format instance. * * Raises a runtime exception if the format string is invalid. * * @function module:struct#new * * @param {string} format * The format string. * * @returns {module:struct.instance} * * @example * // Create a format of three consecutive unsigned int values in network byte order. * const fmt = struct.new('!III'); * const buf = fmt.pack(1, 2, 3); // "\x00\x00\x00\x01…" * print(fmt.unpack(buf), "\n"); // [ 1, 2, 3 ] */ static uc_value_t * uc_struct_new(uc_vm_t *vm, size_t nargs) { uc_value_t *fmtval = uc_fn_arg(0); formatstate_t *state; state = parse_format(vm, fmtval); if (!state) return NULL; return uc_resource_new(struct_type, state); } static void uc_struct_gc(void *ud) { formatstate_t *state = ud; free(state); } /** * Pack given values. * * The `pack()` function creates a byte string containing the argument values * packed according to the given format instance. * * Returns the packed string. * * Raises a runtime exception if a given argument value does not match the * required type of the corresponding format string directive. * * @function module:struct.instance#pack * * @param {...*} values * Variable number of values to pack. * * @returns {string} * * @example * const fmt = struct.new(…); * const data = fmt.pack(…); */ static uc_value_t * uc_struct_pack(uc_vm_t *vm, size_t nargs) { formatstate_t **state = uc_fn_this("struct"); if (!state || !*state) return NULL; return uc_pack_common(vm, nargs, *state, 0); } /** * Unpack given byte string. * * The `unpack()` function interpretes a byte string according to the given * format instance and returns the resulting values. If the optional offset * argument is given, unpacking starts from this byte position within the input. * If not specified, the start offset defaults to `0`, the start of the given * input string. * * Returns an array of unpacked values. * * Raises a runtime exception if an invalid input string or offset value is * given. * * @function module:struct.instance#unpack * * @param {string} input * The input string to unpack. * * @param {number} [offset=0] * The offset within the input string to start unpacking from. * * @returns {array} * * @example * const fmt = struct.new(…); * const values = fmt.unpack(…); */ static uc_value_t * uc_struct_unpack(uc_vm_t *vm, size_t nargs) { formatstate_t **state = uc_fn_this("struct"); if (!state || !*state) return NULL; return uc_unpack_common(vm, nargs, *state, 0); } static const uc_function_list_t struct_inst_fns[] = { { "pack", uc_struct_pack }, { "unpack", uc_struct_unpack } }; static const uc_function_list_t struct_fns[] = { { "pack", uc_pack }, { "unpack", uc_unpack }, { "new", uc_struct_new } }; void uc_module_init(uc_vm_t *vm, uc_value_t *scope) { optimize_functions(); uc_function_list_register(scope, struct_fns); struct_type = uc_type_declare(vm, "struct", struct_inst_fns, uc_struct_gc); }