struct: implement `*` format, fix invalid memory accesses

Implement a new `*` format which acts like `s` on unpack but accepts input records which are shorter than the specified length, e.g. the following call will yield "abc" while an equivalent "10s" format would fail: unpack("2*", "abc") // [ "ab" ] unpack("10*", "abc") // [ "abc" ] unpack("10s", "abc") // null The `*` format is primarily useful to extract the remainder of a variable length record without having to encode the specific length of the record directly into the format string. When packing records, the `*` format takes at most as many bytes as specified in the format string repeat count. If the input string is shorter than the given repeat count, only as many bytes as present in the input string are taken. A bare `*` without any repeat count will take all bytes from the given input string: pack("2*", "abc") // "ab" pack("10*", "abc") // "abc" pack("*", "abc") // "abc" pack("10s", "abc") // "abc\0\0\0\0\0\0\0" Additionally prevent invalid memory accesses when unpacking a buffer shorter than the length expected by the format string. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
author: Jo-Philipp Wich <jo@mein.io> 2022-01-24 18:14:36 +0100
committer: Jo-Philipp Wich <jo@mein.io> 2022-01-24 22:10:17 +0100
commit: a1b3c5dfbe0aaf46d3d71656e65d72526bfa5543 (patch)
tree: 22a01f1f8350ae65eff996cd504fdf5e11fb5105 /lib
parent: b2fac62699687368559af06487338f282e30c723 (diff)
1 files changed, 89 insertions, 30 deletions
diff --git a/lib/struct.c b/lib/struct.c
index dad73a6..5d301db 100644
--- a/lib/struct.c
+++ b/lib/struct.c
@@ -1352,6 +1352,7 @@ static const formatdef_t native_endian_table[] = {
 	{ 'b', sizeof(char), 0, native_unpack_byte, native_pack_byte },
 	{ 'B', sizeof(char), 0, native_unpack_ubyte, native_pack_ubyte },
 	{ 'c', sizeof(char), 0, native_unpack_char, native_pack_char },
+	{ '*', sizeof(char), 0, NULL, NULL },
 	{ 's', sizeof(char), 0, NULL, NULL },
 	{ 'p', sizeof(char), 0, NULL, NULL },
 	{ 'h', sizeof(short), SHORT_ALIGN, native_unpack_short, native_pack_short },
@@ -1617,6 +1618,7 @@ static formatdef_t big_endian_table[] = {
 	{ 'b', 1, 0, native_unpack_byte, native_pack_byte },
 	{ 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte },
 	{ 'c', 1, 0, native_unpack_char, native_pack_char },
+	{ '*', 1, 0, NULL, NULL },
 	{ 's', 1, 0, NULL, NULL },
 	{ 'p', 1, 0, NULL, NULL },
 	{ 'h', 2, 0, be_unpack_int, be_pack_int },
@@ -1866,6 +1868,7 @@ static formatdef_t little_endian_table[] = {
 	{ 'b', 1, 0, native_unpack_byte, native_pack_byte },
 	{ 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte },
 	{ 'c', 1, 0, native_unpack_char, native_pack_char },
+	{ '*', 1, 0, NULL, NULL },
 	{ 's', 1, 0, NULL, NULL },
 	{ 'p', 1, 0, NULL, NULL },
 	{ 'h', 2, 0, le_unpack_int, le_pack_int },
@@ -2074,7 +2077,8 @@ parse_format(uc_vm_t *vm, uc_value_t *fmtval)
 			return NULL;
 
 		switch (c) {
-		case 's': /* fall through */
+		case '*': /* fall through */
+		case 's':
 		case 'p':
 			len++;
 			ncodes++;
@@ -2102,7 +2106,7 @@ parse_format(uc_vm_t *vm, uc_value_t *fmtval)
 		if (num > (SSIZE_MAX - size) / itemsize)
 			goto overflow;
 
-		size += num * itemsize;
+		size += (c != '*') ? num * itemsize : 0;
 	}
 
 	/* check for overflow */
@@ -2131,7 +2135,10 @@ parse_format(uc_vm_t *vm, uc_value_t *fmtval)
 
 			while ('0' <= (c = *s++) && c <= '9')
 				num = num*10 + (c - '0');
+
 		}
+		else if (c == '*')
+			num = -1;
 		else
 			num = 1;
 
@@ -2142,13 +2149,13 @@ parse_format(uc_vm_t *vm, uc_value_t *fmtval)
 
 		size = align_for_entry(size, e);
 
-		if (c == 's' || c == 'p') {
+		if (c == '*' || c == 's' || c == 'p') {
 			codes->offset = size;
 			codes->size = num;
 			codes->fmtdef = e;
 			codes->repeat = 1;
 			codes++;
-			size += num;
+			size += (c != '*') ? num : 0;
 		}
 		else if (c == 'x') {
 			size += num;
@@ -2176,28 +2183,68 @@ static uc_value_t *
 uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff)
 {
 	formatcode_t *code;
+	size_t ncode, off;
 	uc_string_t *buf;
-	size_t ncode;
+	ssize_t size, n;
+	const void *p;
+
+	for (ncode = 0, code = &state->codes[0], off = 0;
+	     ncode < state->ncodes;
+	     code = &state->codes[++ncode]) {
+		if (code->fmtdef->format == '*') {
+			uc_value_t *v = uc_fn_arg(argoff + ncode);
+
+			if (ucv_type(v) != UC_STRING)
+				continue;
+
+			n = ucv_string_length(v);
+
+			if (code->size == -1 || code->size > n)
+				off += n;
+			else
+				off += code->size;
+		}
+	}
 
-	buf = xalloc(sizeof(*buf) + state->size + 1);
+	buf = xalloc(sizeof(*buf) + state->size + off + 1);
 	buf->header.type = UC_STRING;
 	buf->header.refcount = 1;
-	buf->length = state->size;
+	buf->length = state->size + off;
 
-	for (ncode = 0, code = &state->codes[0];
+	for (ncode = 0, code = &state->codes[0], off = 0;
 	     ncode < state->ncodes;
 	     code = &state->codes[++ncode]) {
 		const formatdef_t *e = code->fmtdef;
-		char *res = buf->str + code->offset;
+		char *res = buf->str + code->offset + off;
 		ssize_t j = code->repeat;
 
 		while (j--) {
 			uc_value_t *v = uc_fn_arg(argoff++);
 
-			if (e->format == 's') {
-				ssize_t n;
-				const void *p;
+			size = code->size;
+
+			if (e->format == '*') {
+				if (ucv_type(v) != UC_STRING) {
+					uc_vm_raise_exception(vm, EXCEPTION_TYPE,
+						"Argument for '*' must be a string");
+
+					goto err;
+				}
+
+				n = ucv_string_length(v);
+				p = ucv_string_get(v);
+
+				if (size == -1 || n < size)
+					size = n;
+				else if (n > size)
+					n = size;
 
+				off += size;
+
+				if (n > 0)
+					memcpy(res, p, n);
+			}
+			else if (e->format == 's') {
 				if (ucv_type(v) != UC_STRING) {
 					uc_vm_raise_exception(vm, EXCEPTION_TYPE,
 						"Argument for 's' must be a string");
@@ -2208,19 +2255,16 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff)
 				n = ucv_string_length(v);
 				p = ucv_string_get(v);
 
-				if (n > code->size)
-					n = code->size;
+				if (n > size)
+					n = size;
 
 				if (n > 0)
 					memcpy(res, p, n);
 			}
 			else if (e->format == 'p') {
-				ssize_t n;
-				const void *p;
-
 				if (ucv_type(v) != UC_STRING) {
 					uc_vm_raise_exception(vm, EXCEPTION_TYPE,
-						"Argument for 's' must be a string");
+						"Argument for 'p' must be a string");
 
 					goto err;
 				}
@@ -2228,8 +2272,8 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff)
 				n = ucv_string_length(v);
 				p = ucv_string_get(v);
 
-				if (n > (code->size - 1))
-					n = code->size - 1;
+				if (n > (size - 1))
+					n = size - 1;
 
 				if (n > 0)
 					memcpy(res + 1, p, n);
@@ -2244,7 +2288,7 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff)
 					goto err;
 			}
 
-			res += code->size;
+			res += size;
 		}
 	}
 
@@ -2261,9 +2305,10 @@ uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff)
 {
 	uc_value_t *bufval = uc_fn_arg(argoff);
 	const char *startfrom = NULL;
+	ssize_t bufrem, size, n;
 	uc_value_t *result;
 	formatcode_t *code;
-	size_t ncode = 0;
+	size_t ncode, off;
 
 	if (ucv_type(bufval) != UC_STRING) {
 		uc_vm_raise_exception(vm, EXCEPTION_TYPE,
@@ -2273,26 +2318,39 @@ uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff)
 	}
 
 	startfrom = ucv_string_get(bufval);
+	bufrem = ucv_string_length(bufval);
 	result = ucv_array_new(vm);
 
-	for (ncode = 0, code = &state->codes[0];
+	for (ncode = 0, code = &state->codes[0], off = 0;
 	     ncode < state->ncodes;
 	     code = &state->codes[++ncode]) {
 		const formatdef_t *e = code->fmtdef;
-		const char *res = startfrom + code->offset;
+		const char *res = startfrom + code->offset + off;
 		ssize_t j = code->repeat;
 
 		while (j--) {
 			uc_value_t *v = NULL;
 
-			if (e->format == 's') {
-				v = ucv_string_new_length(res, code->size);
+			size = code->size;
+
+			if (e->format == '*') {
+				if (size == -1 || size > bufrem)
+					size = bufrem;
+
+				off += size;
+			}
+			else if (size > bufrem) {
+				goto fail;
+			}
+
+			if (e->format == 's' || e->format == '*') {
+				v = ucv_string_new_length(res, size);
 			}
 			else if (e->format == 'p') {
-				ssize_t n = *(unsigned char *)res;
+				n = *(unsigned char *)res;
 
-				if (n >= code->size)
-					n = code->size - 1;
+				if (n >= size)
+					n = (size > 0 ? size - 1 : 0);
 
 				v = ucv_string_new_length(res + 1, n);
 			}
@@ -2305,7 +2363,8 @@ uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff)
 
 			ucv_array_push(result, v);
 
-			res += code->size;
+			res += size;
+			bufrem -= size;
 		}
 	}
author	Jo-Philipp Wich <jo@mein.io>	2022-01-24 18:14:36 +0100
committer	Jo-Philipp Wich <jo@mein.io>	2022-01-24 22:10:17 +0100
commit	a1b3c5dfbe0aaf46d3d71656e65d72526bfa5543 (patch)
tree	22a01f1f8350ae65eff996cd504fdf5e11fb5105 /lib
parent	b2fac62699687368559af06487338f282e30c723 (diff)