diff options
-rw-r--r-- | README.md | 29 | ||||
-rw-r--r-- | compiler.c | 7 | ||||
-rw-r--r-- | lib.c | 153 | ||||
-rw-r--r-- | lib/fs.c | 3 | ||||
-rw-r--r-- | tests/custom/03_stdlib/56_hexdec | 29 | ||||
-rw-r--r-- | tests/custom/03_stdlib/57_hexenc | 24 | ||||
-rw-r--r-- | tests/custom/03_stdlib/58_index | 48 | ||||
-rw-r--r-- | tests/custom/03_stdlib/59_rindex | 48 | ||||
-rw-r--r-- | tests/custom/04_bugs/37_compiler_unexpected_unary_op | 21 | ||||
-rw-r--r-- | tests/custom/04_bugs/38_index_segfault | 28 |
10 files changed, 375 insertions, 15 deletions
@@ -1473,3 +1473,32 @@ does not implement this clock type. clock(); // [ 1647954926, 798269464 ] clock(true); // [ 474751, 527959975 ] ``` + +#### 6.70. `hexdec(hexstring[, skipchars])` + +The `hexdec()` function decodes the given hexadecimal digit string into +a byte string, optionally skipping specified characters. + +If the characters to skip are not specified, a default of `" \t\n"` is +used. + +Returns null if the input string contains invalid characters or an uneven +amount of hex digits. + +Returns the decoded byte string on success. + +```javascript +hexdec("48656c6c6f20776f726c64210a"); // "Hello world!\n" +hexdec("44:55:66:77:33:44", ":"); // "DUfw3D" +``` + +#### 6.71. `hexenc(val)` + +The `hexenc()` function encodes the given byte string into a hexadecimal +digit string, converting the input value to a string if needed. + +Returns the encoded hexadecimal digit string. + +```javascript +hexenc("Hello world!\n"); // "48656c6c6f20776f726c64210a" +``` @@ -372,6 +372,13 @@ uc_compiler_parse_precedence(uc_compiler_t *compiler, uc_precedence_t precedence rule = uc_compiler_parse_rule(compiler->exprstack->token); + if (!rule->infix) { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, "Expecting ';' or binary operator"); + uc_compiler_parse_advance(compiler); + + return; + } + /* allow reserved words in property accessors */ if (rule->infix == uc_compiler_compile_dot) compiler->parser->lex.no_keyword = true; @@ -285,39 +285,68 @@ uc_length(uc_vm_t *vm, size_t nargs) } } +static int +uc_uniq_ucv_equal(const void *k1, const void *k2); + static uc_value_t * uc_index(uc_vm_t *vm, size_t nargs, bool right) { uc_value_t *stack = uc_fn_arg(0); uc_value_t *needle = uc_fn_arg(1); const char *sstr, *nstr, *p; - size_t arridx, len; + size_t arridx, slen, nlen; ssize_t ret = -1; switch (ucv_type(stack)) { case UC_ARRAY: - for (arridx = 0, len = ucv_array_length(stack); arridx < len; arridx++) { - if (ucv_compare(I_EQ, ucv_array_get(stack, arridx), needle, NULL)) { - ret = (ssize_t)arridx; - - if (!right) + if (right) { + for (arridx = ucv_array_length(stack); arridx > 0; arridx--) { + if (uc_uniq_ucv_equal(ucv_array_get(stack, arridx - 1), needle)) { + ret = (ssize_t)(arridx - 1); break; + } + } + } + else { + for (arridx = 0, slen = ucv_array_length(stack); arridx < slen; arridx++) { + if (uc_uniq_ucv_equal(ucv_array_get(stack, arridx), needle)) { + ret = (ssize_t)arridx; + break; + } } } return ucv_int64_new(ret); case UC_STRING: - sstr = ucv_string_get(stack); - nstr = needle ? ucv_string_get(needle) : NULL; - len = needle ? strlen(nstr) : 0; - - for (p = sstr; *p && len; p++) { - if (!strncmp(p, nstr, len)) { - ret = (ssize_t)(p - sstr); + if (ucv_type(needle) == UC_STRING) { + sstr = ucv_string_get(stack); + slen = ucv_string_length(stack); + nstr = ucv_string_get(needle); + nlen = ucv_string_length(needle); + + if (slen == nlen) { + if (memcmp(sstr, nstr, nlen) == 0) + ret = 0; + } + else if (slen > nlen) { + if (right) { + p = sstr + slen - nlen; + + do { + if (memcmp(p, nstr, nlen) == 0) { + ret = (ssize_t)(p - sstr); + break; + } + } + while (--p != sstr); + } + else { + p = (const char *)memmem(sstr, slen, nstr, nlen); - if (!right) - break; + if (p) + ret = (ssize_t)(p - sstr); + } } } @@ -3352,6 +3381,98 @@ uc_clock(uc_vm_t *vm, size_t nargs) return res; } +static uc_value_t * +uc_hexenc(uc_vm_t *vm, size_t nargs) +{ + const char *hex = "0123456789abcdef"; + uc_value_t *input = uc_fn_arg(0); + uc_stringbuf_t *buf; + size_t off, len; + uint8_t byte; + + if (!input) + return NULL; + + buf = ucv_stringbuf_new(); + off = printbuf_length(buf); + + ucv_to_stringbuf(vm, buf, input, false); + + len = printbuf_length(buf) - off; + + /* memset the last expected output char to grow the output buffer */ + printbuf_memset(buf, off + len * 2, 0, 1); + + /* translate string into hex back to front to reuse the same buffer */ + while (len > 0) { + byte = buf->buf[--len + off]; + buf->buf[off + len * 2 + 0] = hex[byte / 16]; + buf->buf[off + len * 2 + 1] = hex[byte % 16]; + } + + /* do not include sentinel `\0` in string length */ + buf->bpos--; + + return ucv_stringbuf_finish(buf); +} + +static inline uint8_t +hexval(unsigned char c, bool lo) +{ + return ((c > '9') ? (c - 'a') + 10 : c - '0') << (lo ? 0 : 4); +} + +static uc_value_t * +uc_hexdec(uc_vm_t *vm, size_t nargs) +{ + uc_value_t *input = uc_fn_arg(0); + uc_value_t *skip = uc_fn_arg(1); + size_t len, off, n, i; + uc_stringbuf_t *buf; + unsigned char *p; + const char *s; + + if (ucv_type(input) != UC_STRING) + return NULL; + + if (skip && ucv_type(skip) != UC_STRING) + return NULL; + + p = (unsigned char *)ucv_string_get(input); + len = ucv_string_length(input); + + s = skip ? (const char *)ucv_string_get(skip) : " \t\n"; + + for (i = 0, n = 0; i < len; i++) { + if (isxdigit(p[i])) + n++; + else if (!s || !strchr(s, p[i])) + return NULL; + } + + if (n & 1) + return NULL; + + buf = ucv_stringbuf_new(); + off = printbuf_length(buf); + + /* preallocate the output buffer */ + printbuf_memset(buf, off, 0, n / 2 + 1); + + for (i = 0, n = 0; i < len; i++) { + if (!isxdigit(p[i])) + continue; + + buf->buf[off + (n >> 1)] |= hexval(p[i] | 32, n & 1); + n++; + } + + /* do not include sentinel `\0` in string length */ + buf->bpos--; + + return ucv_stringbuf_finish(buf); +} + const uc_function_list_t uc_stdlib_functions[] = { { "chr", uc_chr }, @@ -3417,6 +3538,8 @@ const uc_function_list_t uc_stdlib_functions[] = { { "timelocal", uc_timelocal }, { "timegm", uc_timegm }, { "clock", uc_clock }, + { "hexdec", uc_hexdec }, + { "hexenc", uc_hexenc }, }; @@ -1168,6 +1168,9 @@ uc_fs_readfile(uc_vm_t *vm, size_t nargs) buf = ucv_stringbuf_new(); + if (limit > -1 && limit < BUFSIZ) + setvbuf(fp, NULL, _IONBF, 0); + while (limit != 0) { blen = 1024; diff --git a/tests/custom/03_stdlib/56_hexdec b/tests/custom/03_stdlib/56_hexdec new file mode 100644 index 0000000..cb842ca --- /dev/null +++ b/tests/custom/03_stdlib/56_hexdec @@ -0,0 +1,29 @@ +The `hexdec()` function decodes the given hexadecimal digit string into +a byte string, optionally skipping specified characters. + +Returns null if the input string contains invalid characters or an uneven +amount of hex digits. + +Returns the decoded byte string on success. + +-- Testcase -- +{% + printf("%.J\n", [ + hexdec("44 55 66 77 33 44\n"), // whitespace is skipped by default + hexdec("44-55-66:77-33-44", ":-"), // skip specified characters + hexdec("abc"), // error; uneven amount of digits + hexdec("ab cd !"), // error; non-whitespace, non-hex, non-skipped char + hexdec(1234), // error; non-string input + ]); +%} +-- End -- + +-- Expect stdout -- +[ + "DUfw3D", + "DUfw3D", + null, + null, + null +] +-- End -- diff --git a/tests/custom/03_stdlib/57_hexenc b/tests/custom/03_stdlib/57_hexenc new file mode 100644 index 0000000..235ad66 --- /dev/null +++ b/tests/custom/03_stdlib/57_hexenc @@ -0,0 +1,24 @@ +The `hexenc()` function encodes the given byte string into a hexadecimal +digit string, converting the input value to a string if needed. + +Returns the encoded hexadecimal digit string. + +-- Testcase -- +{% + printf("%.J\n", [ + hexenc("Hello world!\n"), // encoding a simple string + hexenc(""), // empty input -> empty output + hexenc([1, 2, 3]), // implicit stringification + hexenc(null), // null input -> null output + ]); +%} +-- End -- + +-- Expect stdout -- +[ + "48656c6c6f20776f726c64210a", + "", + "5b20312c20322c2033205d", + null +] +-- End -- diff --git a/tests/custom/03_stdlib/58_index b/tests/custom/03_stdlib/58_index new file mode 100644 index 0000000..30c5146 --- /dev/null +++ b/tests/custom/03_stdlib/58_index @@ -0,0 +1,48 @@ +The `index()` function locates an element within a given array or a substring +position within a given string, depending on the type of arguments given. + +Returns `null` if the given haystack argument is neither an array nor a string, +returns `-1` if the element was not found within the array or the substring was +not found within the string. + +Returns the first found index position in all other cases. + +-- Testcase -- +{% + let o = {}; + + printf("%.J\n", [ + index([ 1, 2, "abc", 3, "abc", 1, 2 ], "abc"), // should return 2 + index([ 1, 2, 3 ], 4), // should return -1 + index([ [], {} ], {}), // should return -1 (strict equality) + index([ [], o ], o), // should return 1 (strict equality) + + index("foobarfoobarfoobar", "arf"), // should return 4 + index("test", "hello"), // should return -1 + index("test", "test"), // should return 0 (needle = haystack length special case) + index("test", ""), // should return 0 (zero length needle special case) + index("", ""), // should return 0 (zero length special case) + index("foo\0foo\0foo", "o\0f"), // should return 2 (binary safe) + + index({ test: true }, true), // should return null + index(1234, 3), // should return null + ]); +%} +-- End -- + +-- Expect stdout -- +[ + 2, + -1, + -1, + 1, + 4, + -1, + 0, + 0, + 0, + 2, + null, + null +] +-- End -- diff --git a/tests/custom/03_stdlib/59_rindex b/tests/custom/03_stdlib/59_rindex new file mode 100644 index 0000000..35f8d2d --- /dev/null +++ b/tests/custom/03_stdlib/59_rindex @@ -0,0 +1,48 @@ +The `rindex()` function locates an element within a given array or a substring +position within a given string, depending on the type of arguments given. + +Returns `null` if the given haystack argument is neither an array nor a string, +returns `-1` if the element was not found within the array or the substring was +not found within the string. + +Returns the last found index position in all other cases. + +-- Testcase -- +{% + let o = {}; + + printf("%.J\n", [ + rindex([ 1, 2, "abc", 3, "abc", 1, 2 ], "abc"), // should return 4 + rindex([ 1, 2, 3 ], 4), // should return -1 + rindex([ [], {} ], {}), // should return -1 (strict equality) + rindex([ [], o ], o), // should return 1 (strict equality) + + rindex("foobarfoobarfoobar", "arf"), // should return 10 + rindex("test", "hello"), // should return -1 + rindex("test", "test"), // should return 0 (needle = haystack length special case) + rindex("test", ""), // should return 4 (zero length needle special case) + rindex("", ""), // should return 0 (zero length special case) + rindex("foo\0foo\0foo", "o\0f"), // should return 6 (binary safe) + + rindex({ test: true }, true), // should return null + rindex(1234, 3), // should return null + ]); +%} +-- End -- + +-- Expect stdout -- +[ + 4, + -1, + -1, + 1, + 10, + -1, + 0, + 4, + 0, + 6, + null, + null +] +-- End -- diff --git a/tests/custom/04_bugs/37_compiler_unexpected_unary_op b/tests/custom/04_bugs/37_compiler_unexpected_unary_op new file mode 100644 index 0000000..e652319 --- /dev/null +++ b/tests/custom/04_bugs/37_compiler_unexpected_unary_op @@ -0,0 +1,21 @@ +When compiling expressions followed by a unary operator, the compiler +triggered a segmentation fault due to invoking an unset infix parser +routine. + +-- Testcase -- +1~1 +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Expecting ';' or binary operator +In line 1, byte 2: + + `1~1` + ^-- Near here + + +-- End -- diff --git a/tests/custom/04_bugs/38_index_segfault b/tests/custom/04_bugs/38_index_segfault new file mode 100644 index 0000000..e29b99f --- /dev/null +++ b/tests/custom/04_bugs/38_index_segfault @@ -0,0 +1,28 @@ +When index() or rindex() was invoked with a string haystack and a non- +string needle argument, a segmentation fault occurred due to an internal +strlen() invocation on a NULL pointer. + +-- Testcase -- +print(index("abc", []), "\n") +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +-1 +-- End -- + + +-- Testcase -- +print(rindex("abc", []), "\n") +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +-1 +-- End -- |