summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2022-04-07 11:40:17 +0200
committerJo-Philipp Wich <jo@mein.io>2022-04-07 13:20:31 +0200
commit1032a67bba78c9124935bc356d764458146886c0 (patch)
treeb021f0d0ab3811d8f36bbed5f0942900448ca9d3
parentdf6b86173c4979aea636907b8f3fb4d0c855ea94 (diff)
lib: let `json()` accept input objects implementing `read()` method
Extend the `uc_json()` implementation to accept readable objects in addition to plain input strings. This allows parsing JSON input directly from open file handles, sockets or other kinds of producer objects without the need to store the entire JSON source string intermediately in memory. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--lib.c148
-rw-r--r--tests/custom/03_stdlib/34_json157
2 files changed, 284 insertions, 21 deletions
diff --git a/lib.c b/lib.c
index ebf6e61..667a12f 100644
--- a/lib.c
+++ b/lib.c
@@ -2073,31 +2073,144 @@ uc_replace(uc_vm_t *vm, size_t nargs)
return ucv_stringbuf_finish(resbuf);
}
-static uc_value_t *
-uc_json(uc_vm_t *vm, size_t nargs)
+static struct json_tokener *
+uc_json_from_object(uc_vm_t *vm, uc_value_t *obj, json_object **jso)
{
- uc_value_t *rv = NULL, *src = uc_fn_arg(0);
- struct json_tokener *tok = NULL;
+ bool trail = false, eof = false;
enum json_tokener_error err;
- json_object *jso = NULL;
- const char *str;
- size_t len;
+ struct json_tokener *tok;
+ uc_value_t *rfn, *rbuf;
+ uc_stringbuf_t *buf;
+
+ rfn = ucv_property_get(obj, "read");
- if (ucv_type(src) != UC_STRING) {
+ if (!ucv_is_callable(rfn)) {
uc_vm_raise_exception(vm, EXCEPTION_TYPE,
- "Passed value is not a string");
+ "Input object does not implement read() method");
return NULL;
}
tok = xjs_new_tokener();
- str = ucv_string_get(src);
- len = ucv_string_length(src);
+
+ while (true) {
+ uc_vm_stack_push(vm, ucv_get(obj));
+ uc_vm_stack_push(vm, ucv_get(rfn));
+ uc_vm_stack_push(vm, ucv_int64_new(1024));
+
+ if (uc_vm_call(vm, true, 1) != EXCEPTION_NONE) {
+ json_tokener_free(tok);
+
+ return NULL;
+ }
+
+ rbuf = uc_vm_stack_pop(vm);
+
+ /* check EOF */
+ eof = (rbuf == NULL || (ucv_type(rbuf) == UC_STRING && ucv_string_length(rbuf) == 0));
+
+ /* on EOF, stop parsing unless trailing garbage was detected which handled below */
+ if (eof && !trail) {
+ ucv_put(rbuf);
+
+ /* Didn't parse a complete object yet, possibly a non-delimitted atomic value
+ such as `null`, `true` etc. - nudge parser by sending final zero byte.
+ See json-c issue #681 <https://github.com/json-c/json-c/issues/681> */
+ if (json_tokener_get_error(tok) == json_tokener_continue)
+ *jso = json_tokener_parse_ex(tok, "\0", 1);
+
+ break;
+ }
+
+ if (trail || *jso) {
+ uc_vm_raise_exception(vm, EXCEPTION_SYNTAX,
+ "Trailing garbage after JSON data");
+
+ json_tokener_free(tok);
+ ucv_put(rbuf);
+
+ return NULL;
+ }
+
+ if (ucv_type(rbuf) != UC_STRING) {
+ buf = xprintbuf_new();
+ ucv_to_stringbuf_formatted(vm, buf, rbuf, 0, '\0', 0);
+
+ *jso = json_tokener_parse_ex(tok, buf->buf, printbuf_length(buf));
+
+ trail = (json_tokener_get_error(tok) == json_tokener_success &&
+ json_tokener_get_parse_end(tok) < (size_t)printbuf_length(buf));
+
+ printbuf_free(buf);
+ }
+ else {
+ *jso = json_tokener_parse_ex(tok, ucv_string_get(rbuf), ucv_string_length(rbuf));
+
+ trail = (json_tokener_get_error(tok) == json_tokener_success &&
+ json_tokener_get_parse_end(tok) < ucv_string_length(rbuf));
+ }
+
+ ucv_put(rbuf);
+
+ err = json_tokener_get_error(tok);
+
+ if (err != json_tokener_success && err != json_tokener_continue)
+ break;
+ }
+
+ return tok;
+}
+
+static struct json_tokener *
+uc_json_from_string(uc_vm_t *vm, uc_value_t *str, json_object **jso)
+{
+ struct json_tokener *tok = xjs_new_tokener();
/* NB: the len + 1 here is intentional to pass the terminating \0 byte
* to the json-c parser. This is required to work-around upstream
* issue #681 <https://github.com/json-c/json-c/issues/681> */
- jso = json_tokener_parse_ex(tok, str, len + 1);
+ *jso = json_tokener_parse_ex(tok, ucv_string_get(str), ucv_string_length(str) + 1);
+
+ if (json_tokener_get_error(tok) == json_tokener_success &&
+ json_tokener_get_parse_end(tok) < ucv_string_length(str)) {
+ uc_vm_raise_exception(vm, EXCEPTION_SYNTAX,
+ "Trailing garbage after JSON data");
+
+ json_tokener_free(tok);
+
+ return NULL;
+ }
+
+ return tok;
+}
+
+static uc_value_t *
+uc_json(uc_vm_t *vm, size_t nargs)
+{
+ uc_value_t *rv = NULL, *src = uc_fn_arg(0);
+ struct json_tokener *tok = NULL;
+ enum json_tokener_error err;
+ json_object *jso = NULL;
+
+ switch (ucv_type(src)) {
+ case UC_STRING:
+ tok = uc_json_from_string(vm, src, &jso);
+ break;
+
+ case UC_RESOURCE:
+ case UC_OBJECT:
+ case UC_ARRAY:
+ tok = uc_json_from_object(vm, src, &jso);
+ break;
+
+ default:
+ uc_vm_raise_exception(vm, EXCEPTION_TYPE,
+ "Passed value is neither a string nor an object");
+ }
+
+ if (!tok)
+ goto out;
+
err = json_tokener_get_error(tok);
if (err == json_tokener_continue) {
@@ -2113,18 +2226,13 @@ uc_json(uc_vm_t *vm, size_t nargs)
goto out;
}
- else if (json_tokener_get_parse_end(tok) < len) {
- uc_vm_raise_exception(vm, EXCEPTION_SYNTAX,
- "Trailing garbage after JSON data");
-
- goto out;
- }
-
rv = ucv_from_json(vm, jso);
out:
- json_tokener_free(tok);
+ if (tok)
+ json_tokener_free(tok);
+
json_object_put(jso);
return rv;
diff --git a/tests/custom/03_stdlib/34_json b/tests/custom/03_stdlib/34_json
index ba8ad9f..c30d6d0 100644
--- a/tests/custom/03_stdlib/34_json
+++ b/tests/custom/03_stdlib/34_json
@@ -49,7 +49,7 @@ Passing a non-string value throws an exception.
-- End --
-- Expect stderr --
-Type error: Passed value is not a string
+Type error: Passed value is neither a string nor an object
In line 2, byte 11:
` json(true);`
@@ -108,3 +108,158 @@ In line 2, byte 28:
-- End --
+
+
+Additionally, `json()` accepts objects implementing a read method as input.
+During JSON parsing, the read method is repeatedly invoked with a buffer size
+hint as sole argument. The return value of the read method is converted to a
+string if needed and passed on to the JSON parser. A `null` or an empty string
+return value is treated as EOF, ending the parse process.
+
+-- Testcase --
+{%
+ let fs = require("fs");
+
+ // parse JSON from open file handle
+ printf("%.J\n",
+ json(fs.open("files/test.json"))
+ );
+%}
+-- End --
+
+-- Expect stdout --
+{
+ "hello": "world"
+}
+-- End --
+
+-- File test.json --
+{"hello":"world"}
+-- End --
+
+
+The `json()` function is able to parse JSON from any object providing a `read()`
+method that incrementally yields JSON source data.
+
+-- Testcase --
+{%
+ let parts = [
+ '{"some"',
+ ':',
+ '"object"',
+ ', ',
+ '"etc."',
+ ':',
+ !0, // this is stringified to "true"
+ '}'
+ ];
+
+ let producer = {
+ read: function(size) {
+ return shift(parts);
+ }
+ };
+
+ // parse JSON from producer object
+ printf("%.J\n",
+ json(producer)
+ );
+%}
+-- End --
+
+-- Expect stdout --
+{
+ "some": "object",
+ "etc.": true
+}
+-- End --
+
+
+Passing objects or resources not providing a `read()` method yields an exception.
+
+-- Testcase --
+{%
+ json({});
+%}
+-- End --
+
+-- Expect stderr --
+Type error: Input object does not implement read() method
+In line 2, byte 9:
+
+ ` json({});`
+ Near here -^
+
+
+-- End --
+
+
+Exceptions triggered by the `read()` method are properly forwarded.
+
+-- Testcase --
+{%
+ json({
+ read: function() {
+ die("Exception in read()");
+ }
+ });
+%}
+-- End --
+
+-- Expect stderr --
+Exception in read()
+In [anonymous function](), line 4, byte 29:
+ called from function json ([C])
+ called from anonymous function ([stdin]:6:3)
+
+ ` die("Exception in read()");`
+ Near here ---------------------------^
+
+
+-- End --
+
+
+EOF stops parsing and does not lead to further `read()` invocations.
+
+-- Testcase --
+{%
+ let parts = [
+ '["some",',
+ '"JSON array",',
+ 'true,false,1,2,3',
+ ']',
+ '', // empty string treated as EOF
+ '{"some":', // this is not reached in the first pass
+ '"object"}',
+ null, // null treated as EOF
+ '"test ', // this is not reached in the second pass
+ 'value"'
+ ];
+
+ let producer = { read: () => shift(parts) };
+
+ printf("%.J\n", [
+ json(producer),
+ json(producer),
+ json(producer)
+ ]);
+%}
+-- End --
+
+-- Expect stdout --
+[
+ [
+ "some",
+ "JSON array",
+ true,
+ false,
+ 1,
+ 2,
+ 3
+ ],
+ {
+ "some": "object"
+ },
+ "test value"
+]
+-- End --