From 8b0318f7fabe06736c4437579054f6040466f6d3 Mon Sep 17 00:00:00 2001 From: Thibaut VARÈNE Date: Tue, 4 Jun 2024 19:29:09 +0200 Subject: lib: introduce zlib library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new "zlib" library which provides simplified bindings for zlib inflate/deflate operations. Signed-off-by: Thibaut VARÈNE --- lib/zlib.c | 493 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 493 insertions(+) create mode 100644 lib/zlib.c (limited to 'lib') diff --git a/lib/zlib.c b/lib/zlib.c new file mode 100644 index 0000000..8190251 --- /dev/null +++ b/lib/zlib.c @@ -0,0 +1,493 @@ +/* + * Copyright (C) 2024 Thibaut VARÈNE + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/** + * # Zlib bindings + * + * The `zlib` module provides single-call-oriented functions for interacting with zlib data. + * + * @module zlib + */ + +#include +#include +#include +#include +#include + +#include "ucode/module.h" +#include "ucode/platform.h" + +// https://zlib.net/zlib_how.html + +/* + * CHUNK is simply the buffer size for feeding data to and pulling data from + * the zlib routines. Larger buffer sizes would be more efficient, especially + * for inflate(). If the memory is available, buffers sizes on the order of + * 128K or 256K bytes should be used. + */ +#define CHUNK 16384 + + +/* zlib init error message */ +static const char * ziniterr(int ret) +{ + const char * msg; + + switch (ret) { + case Z_ERRNO: + msg = strerror(errno); + break; + case Z_STREAM_ERROR: // can only happen for deflateInit2() by construction + msg = "invalid compression level"; + break; + case Z_MEM_ERROR: + msg = "out of memory"; + break; + case Z_VERSION_ERROR: + msg = "zlib version mismatch!"; + break; + default: + msg = "unknown error"; + break; + } + + return msg; +} + +static uc_stringbuf_t * +uc_zlib_def_object(uc_vm_t *vm, uc_value_t *obj, z_stream *strm) +{ + int ret; + bool eof = false; + uc_value_t *rfn, *rbuf; + uc_stringbuf_t *buf = NULL; + + rfn = ucv_property_get(obj, "read"); + + if (!ucv_is_callable(rfn)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Input object does not implement read() method"); + return NULL; + } + + buf = ucv_stringbuf_new(); + + do { + rbuf = NULL; + uc_vm_stack_push(vm, ucv_get(obj)); + uc_vm_stack_push(vm, ucv_get(rfn)); + uc_vm_stack_push(vm, ucv_int64_new(CHUNK)); + + if (uc_vm_call(vm, true, 1) != EXCEPTION_NONE) + goto fail; + + rbuf = uc_vm_stack_pop(vm); // read output chunk + + /* we only accept strings */ + if (rbuf != NULL && ucv_type(rbuf) != UC_STRING) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Input object read() method returned non-string value"); + goto fail; + } + + /* check EOF */ + eof = (rbuf == NULL || ucv_string_length(rbuf) == 0); + + strm->next_in = (unsigned char *)ucv_string_get(rbuf); + strm->avail_in = ucv_string_length(rbuf); + + /* run deflate() on input until output buffer not full */ + do { + // enlarge buf by CHUNK amount as needed + printbuf_memset(buf, printbuf_length(buf) + CHUNK - 1, 0, 1); + buf->bpos -= CHUNK; + + strm->avail_out = CHUNK; + strm->next_out = (unsigned char *)(buf->buf + buf->bpos);; + + ret = deflate(strm, eof ? Z_FINISH : Z_NO_FLUSH); // no bad return value here + assert(ret != Z_STREAM_ERROR); // state never clobbered (would be mem corruption) + (void)ret; // XXX make annoying compiler that ignores assert() happy + + // update bpos past data written by deflate() + buf->bpos += CHUNK - strm->avail_out; + } while (strm->avail_out == 0); + assert(strm->avail_in == 0); // all input will be used + + ucv_put(rbuf); // release rbuf + } while (!eof); // finish compression if all of source has been read in + assert(ret == Z_STREAM_END); // stream will be complete + + return buf; + +fail: + ucv_put(rbuf); + printbuf_free(buf); + return NULL; +} + +static uc_stringbuf_t * +uc_zlib_def_string(uc_vm_t *vm, uc_value_t *str, z_stream *strm) +{ + int ret; + uc_stringbuf_t *buf = NULL; + + buf = ucv_stringbuf_new(); + + strm->next_in = (unsigned char *)ucv_string_get(str); + strm->avail_in = ucv_string_length(str); + + do { + printbuf_memset(buf, printbuf_length(buf) + CHUNK - 1, 0, 1); + buf->bpos -= CHUNK; + + strm->avail_out = CHUNK; + strm->next_out = (unsigned char *)(buf->buf + buf->bpos); + + ret = deflate(strm, Z_FINISH); + assert(ret != Z_STREAM_ERROR); + + buf->bpos += CHUNK - strm->avail_out; + } while (ret != Z_STREAM_END); + assert(strm->avail_in == 0); + + return buf; +} + +/** + * Compresses data in Zlib or gzip format. + * + * If the input argument is a plain string, it is directly compressed. + * + * If an array, object or resource value is given, this function will attempt to + * invoke a `read()` method on it to read chunks of input text to incrementally + * compress. Reading will stop if the object's `read()` method returns + * either `null` or an empty string. + * + * Throws an exception on errors. + * + * Returns the compressed data. + * + * @function module:zlib#deflate + * + * @param {string} str_or_resource + * The string or resource object to be parsed as JSON. + * + * @param {?boolean} [gzip=false] + * Add a gzip header if true (creates a gzip-compliant output, otherwise defaults to Zlib) + * + * @param {?number} [level=Z_DEFAULT_COMPRESSION] + * The compression level (0-9). + * + * @returns {?string} + * + * @example + * // deflate content using default compression + * const deflated = deflate(content); + * + * // deflate content using fastest compression + * const deflated = deflate(content, Z_BEST_SPEED); + */ +static uc_value_t * +uc_zlib_deflate(uc_vm_t *vm, size_t nargs) +{ + uc_value_t *rv = NULL; + uc_value_t *src = uc_fn_arg(0); + uc_value_t *gzip = uc_fn_arg(1); + uc_value_t *level = uc_fn_arg(2); + uc_stringbuf_t *buf = NULL; + int ret, lvl = Z_DEFAULT_COMPRESSION; + bool gz = false; + z_stream strm = { + .zalloc = Z_NULL, + .zfree = Z_NULL, + .opaque = Z_NULL, + }; + + if (gzip) { + if (ucv_type(gzip) != UC_BOOLEAN) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Passed gzip flag is not a boolean"); + goto out; + } + + gz = (int)ucv_boolean_get(gzip); + } + + if (level) { + if (ucv_type(level) != UC_INTEGER) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Passed level is not a number"); + goto out; + } + + lvl = (int)ucv_int64_get(level); + } + + ret = deflateInit2(&strm, lvl, + Z_DEFLATED, // only allowed method + gz ? 15+16 : 15, // 15 Zlib default, +16 for gzip + 8, // default value + Z_DEFAULT_STRATEGY); // default value + if (ret != Z_OK) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Zlib error: %s", ziniterr(ret)); + goto out; + } + + switch (ucv_type(src)) { + case UC_STRING: + buf = uc_zlib_def_string(vm, src, &strm); + break; + + case UC_RESOURCE: + case UC_OBJECT: + case UC_ARRAY: + buf = uc_zlib_def_object(vm, src, &strm); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Passed value is neither a string nor an object"); + goto out; + } + + if (!buf) { + if (vm->exception.type == EXCEPTION_NONE) // do not clobber previous exception + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Zlib error: %s", strm.msg); + goto out; + } + + rv = ucv_stringbuf_finish(buf); + +out: + (void)deflateEnd(&strm); + return rv; +} + +static uc_stringbuf_t * +uc_zlib_inf_object(uc_vm_t *vm, uc_value_t *obj, z_stream *strm) +{ + int ret = Z_STREAM_ERROR; // error out if EOF on first loop + bool eof = false; + uc_value_t *rfn, *rbuf; + uc_stringbuf_t *buf = NULL; + + rfn = ucv_property_get(obj, "read"); + + if (!ucv_is_callable(rfn)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Input object does not implement read() method"); + return NULL; + } + + buf = ucv_stringbuf_new(); + + do { + rbuf = NULL; + uc_vm_stack_push(vm, ucv_get(obj)); + uc_vm_stack_push(vm, ucv_get(rfn)); + uc_vm_stack_push(vm, ucv_int64_new(CHUNK)); + + if (uc_vm_call(vm, true, 1) != EXCEPTION_NONE) + goto fail; + + rbuf = uc_vm_stack_pop(vm); // read output chunk + + /* we only accept strings */ + if (rbuf != NULL && ucv_type(rbuf) != UC_STRING) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Input object read() method returned non-string value"); + goto fail; + } + + /* check EOF */ + eof = (rbuf == NULL || ucv_string_length(rbuf) == 0); + if (eof) + break; + + strm->next_in = (unsigned char *)ucv_string_get(rbuf); + strm->avail_in = ucv_string_length(rbuf); + + /* run deflate() on input until output buffer not full */ + do { + // enlarge buf by CHUNK amount as needed + printbuf_memset(buf, printbuf_length(buf) + CHUNK - 1, 0, 1); + buf->bpos -= CHUNK; + + strm->avail_out = CHUNK; + strm->next_out = (unsigned char *)(buf->buf + buf->bpos);; + + ret = inflate(strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); // state never clobbered (would be mem corruption) + switch (ret) { + case Z_NEED_DICT: + case Z_DATA_ERROR: + case Z_MEM_ERROR: + goto fail; + } + + // update bpos past data written by deflate() + buf->bpos += CHUNK - strm->avail_out; + } while (strm->avail_out == 0); + + ucv_put(rbuf); // release rbuf + } while (ret != Z_STREAM_END); // done when inflate() says it's done + + if (ret != Z_STREAM_END) { // data error + printbuf_free(buf); + buf = NULL; + } + + return buf; + +fail: + ucv_put(rbuf); + printbuf_free(buf); + return NULL; +} + +static uc_stringbuf_t * +uc_zlib_inf_string(uc_vm_t *vm, uc_value_t *str, z_stream *strm) +{ + int ret; + uc_stringbuf_t *buf = NULL; + + buf = ucv_stringbuf_new(); + + strm->next_in = (unsigned char *)ucv_string_get(str); + strm->avail_in = ucv_string_length(str); + + do { + printbuf_memset(buf, printbuf_length(buf) + CHUNK - 1, 0, 1); + buf->bpos -= CHUNK; + + strm->avail_out = CHUNK; + strm->next_out = (unsigned char *)(buf->buf + buf->bpos); + + ret = inflate(strm, Z_FINISH); + assert(ret != Z_STREAM_ERROR); + switch (ret) { + case Z_NEED_DICT: + case Z_DATA_ERROR: + case Z_MEM_ERROR: + printbuf_free(buf); + return NULL; + } + + buf->bpos += CHUNK - strm->avail_out; + } while (ret != Z_STREAM_END); + assert(strm->avail_in == 0); + + return buf; +} + +/** + * Decompresses data in Zlib or gzip format. + * + * If the input argument is a plain string, it is directly decompressed. + * + * If an array, object or resource value is given, this function will attempt to + * invoke a `read()` method on it to read chunks of input text to incrementally + * decompress. Reading will stop if the object's `read()` method returns + * either `null` or an empty string. + * + * Throws an exception on errors. + * + * Returns the decompressed data. + * + * @function module:zlib#inflate + * + * @param {string} str_or_resource + * The string or resource object to be parsed as JSON. + * + * @returns {?string} + */ +static uc_value_t * +uc_zlib_inflate(uc_vm_t *vm, size_t nargs) +{ + uc_value_t *rv = NULL; + uc_value_t *src = uc_fn_arg(0); + uc_stringbuf_t *buf = NULL; + int ret; + z_stream strm = { + .zalloc = Z_NULL, + .zfree = Z_NULL, + .opaque = Z_NULL, + .avail_in = 0, // must be initialized before call to inflateInit + .next_in = Z_NULL, // must be initialized before call to inflateInit + }; + + /* tell inflateInit2 to perform either zlib or gzip decompression: 15+32 */ + ret = inflateInit2(&strm, 15+32); + if (ret != Z_OK) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Zlib error: %s", ziniterr(ret)); + goto out; + } + + switch (ucv_type(src)) { + case UC_STRING: + buf = uc_zlib_inf_string(vm, src, &strm); + break; + + case UC_RESOURCE: + case UC_OBJECT: + case UC_ARRAY: + buf = uc_zlib_inf_object(vm, src, &strm); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Passed value is neither a string nor an object"); + goto out; + } + + if (!buf) { + if (vm->exception.type == EXCEPTION_NONE) // do not clobber previous exception + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Zlib error: %s", strm.msg); + goto out; + } + + rv = ucv_stringbuf_finish(buf); + +out: + (void)inflateEnd(&strm); + return rv; +} + +static const uc_function_list_t global_fns[] = { + { "deflate", uc_zlib_deflate }, + { "inflate", uc_zlib_inflate }, +}; + +void uc_module_init(uc_vm_t *vm, uc_value_t *scope) +{ + uc_function_list_register(scope, global_fns); + +#define ADD_CONST(x) ucv_object_add(scope, #x, ucv_int64_new(x)) + + /** + * @typedef + * @name Compression levels + * @description Constants representing predefined compression levels. + * @property {number} Z_NO_COMPRESSION. + * @property {number} Z_BEST_SPEED. + * @property {number} Z_BEST_COMPRESSION. + * @property {number} Z_DEFAULT_COMPRESSION - default compromise between speed and compression (currently equivalent to level 6). + */ + ADD_CONST(Z_NO_COMPRESSION); + ADD_CONST(Z_BEST_SPEED); + ADD_CONST(Z_BEST_COMPRESSION); + ADD_CONST(Z_DEFAULT_COMPRESSION); +} -- cgit v1.2.3