From 1032a67bba78c9124935bc356d764458146886c0 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 7 Apr 2022 11:40:17 +0200 Subject: lib: let `json()` accept input objects implementing `read()` method Extend the `uc_json()` implementation to accept readable objects in addition to plain input strings. This allows parsing JSON input directly from open file handles, sockets or other kinds of producer objects without the need to store the entire JSON source string intermediately in memory. Signed-off-by: Jo-Philipp Wich --- lib.c | 148 ++++++++++++++++++++++++++++++++------ tests/custom/03_stdlib/34_json | 157 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 284 insertions(+), 21 deletions(-) diff --git a/lib.c b/lib.c index ebf6e61..667a12f 100644 --- a/lib.c +++ b/lib.c @@ -2073,31 +2073,144 @@ uc_replace(uc_vm_t *vm, size_t nargs) return ucv_stringbuf_finish(resbuf); } -static uc_value_t * -uc_json(uc_vm_t *vm, size_t nargs) +static struct json_tokener * +uc_json_from_object(uc_vm_t *vm, uc_value_t *obj, json_object **jso) { - uc_value_t *rv = NULL, *src = uc_fn_arg(0); - struct json_tokener *tok = NULL; + bool trail = false, eof = false; enum json_tokener_error err; - json_object *jso = NULL; - const char *str; - size_t len; + struct json_tokener *tok; + uc_value_t *rfn, *rbuf; + uc_stringbuf_t *buf; + + rfn = ucv_property_get(obj, "read"); - if (ucv_type(src) != UC_STRING) { + if (!ucv_is_callable(rfn)) { uc_vm_raise_exception(vm, EXCEPTION_TYPE, - "Passed value is not a string"); + "Input object does not implement read() method"); return NULL; } tok = xjs_new_tokener(); - str = ucv_string_get(src); - len = ucv_string_length(src); + + while (true) { + uc_vm_stack_push(vm, ucv_get(obj)); + uc_vm_stack_push(vm, ucv_get(rfn)); + uc_vm_stack_push(vm, ucv_int64_new(1024)); + + if (uc_vm_call(vm, true, 1) != EXCEPTION_NONE) { + json_tokener_free(tok); + + return NULL; + } + + rbuf = uc_vm_stack_pop(vm); + + /* check EOF */ + eof = (rbuf == NULL || (ucv_type(rbuf) == UC_STRING && ucv_string_length(rbuf) == 0)); + + /* on EOF, stop parsing unless trailing garbage was detected which handled below */ + if (eof && !trail) { + ucv_put(rbuf); + + /* Didn't parse a complete object yet, possibly a non-delimitted atomic value + such as `null`, `true` etc. - nudge parser by sending final zero byte. + See json-c issue #681 */ + if (json_tokener_get_error(tok) == json_tokener_continue) + *jso = json_tokener_parse_ex(tok, "\0", 1); + + break; + } + + if (trail || *jso) { + uc_vm_raise_exception(vm, EXCEPTION_SYNTAX, + "Trailing garbage after JSON data"); + + json_tokener_free(tok); + ucv_put(rbuf); + + return NULL; + } + + if (ucv_type(rbuf) != UC_STRING) { + buf = xprintbuf_new(); + ucv_to_stringbuf_formatted(vm, buf, rbuf, 0, '\0', 0); + + *jso = json_tokener_parse_ex(tok, buf->buf, printbuf_length(buf)); + + trail = (json_tokener_get_error(tok) == json_tokener_success && + json_tokener_get_parse_end(tok) < (size_t)printbuf_length(buf)); + + printbuf_free(buf); + } + else { + *jso = json_tokener_parse_ex(tok, ucv_string_get(rbuf), ucv_string_length(rbuf)); + + trail = (json_tokener_get_error(tok) == json_tokener_success && + json_tokener_get_parse_end(tok) < ucv_string_length(rbuf)); + } + + ucv_put(rbuf); + + err = json_tokener_get_error(tok); + + if (err != json_tokener_success && err != json_tokener_continue) + break; + } + + return tok; +} + +static struct json_tokener * +uc_json_from_string(uc_vm_t *vm, uc_value_t *str, json_object **jso) +{ + struct json_tokener *tok = xjs_new_tokener(); /* NB: the len + 1 here is intentional to pass the terminating \0 byte * to the json-c parser. This is required to work-around upstream * issue #681 */ - jso = json_tokener_parse_ex(tok, str, len + 1); + *jso = json_tokener_parse_ex(tok, ucv_string_get(str), ucv_string_length(str) + 1); + + if (json_tokener_get_error(tok) == json_tokener_success && + json_tokener_get_parse_end(tok) < ucv_string_length(str)) { + uc_vm_raise_exception(vm, EXCEPTION_SYNTAX, + "Trailing garbage after JSON data"); + + json_tokener_free(tok); + + return NULL; + } + + return tok; +} + +static uc_value_t * +uc_json(uc_vm_t *vm, size_t nargs) +{ + uc_value_t *rv = NULL, *src = uc_fn_arg(0); + struct json_tokener *tok = NULL; + enum json_tokener_error err; + json_object *jso = NULL; + + switch (ucv_type(src)) { + case UC_STRING: + tok = uc_json_from_string(vm, src, &jso); + break; + + case UC_RESOURCE: + case UC_OBJECT: + case UC_ARRAY: + tok = uc_json_from_object(vm, src, &jso); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Passed value is neither a string nor an object"); + } + + if (!tok) + goto out; + err = json_tokener_get_error(tok); if (err == json_tokener_continue) { @@ -2113,18 +2226,13 @@ uc_json(uc_vm_t *vm, size_t nargs) goto out; } - else if (json_tokener_get_parse_end(tok) < len) { - uc_vm_raise_exception(vm, EXCEPTION_SYNTAX, - "Trailing garbage after JSON data"); - - goto out; - } - rv = ucv_from_json(vm, jso); out: - json_tokener_free(tok); + if (tok) + json_tokener_free(tok); + json_object_put(jso); return rv; diff --git a/tests/custom/03_stdlib/34_json b/tests/custom/03_stdlib/34_json index ba8ad9f..c30d6d0 100644 --- a/tests/custom/03_stdlib/34_json +++ b/tests/custom/03_stdlib/34_json @@ -49,7 +49,7 @@ Passing a non-string value throws an exception. -- End -- -- Expect stderr -- -Type error: Passed value is not a string +Type error: Passed value is neither a string nor an object In line 2, byte 11: ` json(true);` @@ -108,3 +108,158 @@ In line 2, byte 28: -- End -- + + +Additionally, `json()` accepts objects implementing a read method as input. +During JSON parsing, the read method is repeatedly invoked with a buffer size +hint as sole argument. The return value of the read method is converted to a +string if needed and passed on to the JSON parser. A `null` or an empty string +return value is treated as EOF, ending the parse process. + +-- Testcase -- +{% + let fs = require("fs"); + + // parse JSON from open file handle + printf("%.J\n", + json(fs.open("files/test.json")) + ); +%} +-- End -- + +-- Expect stdout -- +{ + "hello": "world" +} +-- End -- + +-- File test.json -- +{"hello":"world"} +-- End -- + + +The `json()` function is able to parse JSON from any object providing a `read()` +method that incrementally yields JSON source data. + +-- Testcase -- +{% + let parts = [ + '{"some"', + ':', + '"object"', + ', ', + '"etc."', + ':', + !0, // this is stringified to "true" + '}' + ]; + + let producer = { + read: function(size) { + return shift(parts); + } + }; + + // parse JSON from producer object + printf("%.J\n", + json(producer) + ); +%} +-- End -- + +-- Expect stdout -- +{ + "some": "object", + "etc.": true +} +-- End -- + + +Passing objects or resources not providing a `read()` method yields an exception. + +-- Testcase -- +{% + json({}); +%} +-- End -- + +-- Expect stderr -- +Type error: Input object does not implement read() method +In line 2, byte 9: + + ` json({});` + Near here -^ + + +-- End -- + + +Exceptions triggered by the `read()` method are properly forwarded. + +-- Testcase -- +{% + json({ + read: function() { + die("Exception in read()"); + } + }); +%} +-- End -- + +-- Expect stderr -- +Exception in read() +In [anonymous function](), line 4, byte 29: + called from function json ([C]) + called from anonymous function ([stdin]:6:3) + + ` die("Exception in read()");` + Near here ---------------------------^ + + +-- End -- + + +EOF stops parsing and does not lead to further `read()` invocations. + +-- Testcase -- +{% + let parts = [ + '["some",', + '"JSON array",', + 'true,false,1,2,3', + ']', + '', // empty string treated as EOF + '{"some":', // this is not reached in the first pass + '"object"}', + null, // null treated as EOF + '"test ', // this is not reached in the second pass + 'value"' + ]; + + let producer = { read: () => shift(parts) }; + + printf("%.J\n", [ + json(producer), + json(producer), + json(producer) + ]); +%} +-- End -- + +-- Expect stdout -- +[ + [ + "some", + "JSON array", + true, + false, + 1, + 2, + 3 + ], + { + "some": "object" + }, + "test value" +] +-- End -- -- cgit v1.2.3