summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2022-01-07 11:00:49 +0100
committerJo-Philipp Wich <jo@mein.io>2022-01-07 11:37:49 +0100
commiteafa321279778d737861c57437fe4fc14c26d36e (patch)
tree288ee5dd0c56cbf00025825691154568a6c23dce
parent1377e23afff90128b18ac60c10071295fc0afbab (diff)
lib: implement uniq() function
The uniq() function allows extracting all unique values from a given input array in an efficient manner. It is roughly equivalent to the following ucode idiom: let seen = {} let unique = filter(array, item => !seen[item]++); In contrast to the code above, `uniq()` does not rely on implicit stringification of item values but performs strict equality tests internally. If equivalence of stringified results is desired, the following code can be used: let unique = uniq(map(array, item => "" + item)); Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--README.md13
-rw-r--r--lib.c99
2 files changed, 111 insertions, 1 deletions
diff --git a/README.md b/README.md
index 524f0c0..a078ce4 100644
--- a/README.md
+++ b/README.md
@@ -1254,3 +1254,16 @@ If a non-string argument is given, the function returns `null`.
b64enc("This is a test"); // "VGhpcyBpcyBhIHRlc3Q="
b64enc(123); // null
```
+
+#### 6.64. `uniq(array)`
+
+Returns a new array containing all unique values of the given input
+array. The order is preserved, that is subsequent duplicate values
+are simply skipped.
+
+If a non-array argument is given, the function returns `null`.
+
+```javascript
+uniq([ 1, true, "foo", 2, true, "bar", "foo" ]); // [ 1, true, "foo", 2, "bar" ]
+uniq("test"); // null
+```
diff --git a/lib.c b/lib.c
index 9a8f6b8..7ded088 100644
--- a/lib.c
+++ b/lib.c
@@ -31,6 +31,7 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <fnmatch.h>
+#include <assert.h>
#include "ucode/lexer.h"
#include "ucode/compiler.h"
@@ -2814,6 +2815,101 @@ uc_b64enc(uc_vm_t *vm, size_t nargs)
* -------------------------------------------------------------------------
*/
+static unsigned long
+uc_uniq_ucv_hash(const void *k)
+{
+ union { double d; int64_t i; uint64_t u; } conv;
+ uc_value_t *uv = (uc_value_t *)k;
+ unsigned int h;
+ uint8_t *u8;
+ size_t len;
+
+ h = ucv_type(uv);
+
+ switch (h) {
+ case UC_STRING:
+ u8 = (uint8_t *)ucv_string_get(uv);
+ len = ucv_string_length(uv);
+ break;
+
+ case UC_INTEGER:
+ conv.i = ucv_int64_get(uv);
+
+ if (errno == ERANGE) {
+ h *= 2;
+ conv.u = ucv_uint64_get(uv);
+ }
+
+ u8 = (uint8_t *)&conv.u;
+ len = sizeof(conv.u);
+ break;
+
+ case UC_DOUBLE:
+ conv.d = ucv_double_get(uv);
+
+ u8 = (uint8_t *)&conv.u;
+ len = sizeof(conv.u);
+ break;
+
+ default:
+ u8 = (uint8_t *)&uv;
+ len = sizeof(uv);
+ break;
+ }
+
+ while (len > 0) {
+ h = h * 129 + (*u8++) + LH_PRIME;
+ len--;
+ }
+
+ return h;
+}
+
+static int
+uc_uniq_ucv_equal(const void *k1, const void *k2)
+{
+ uc_value_t *uv1 = (uc_value_t *)k1;
+ uc_value_t *uv2 = (uc_value_t *)k2;
+
+ if (!ucv_is_scalar(uv1) && !ucv_is_scalar(uv2))
+ return (uv1 == uv2);
+
+ return ucv_is_equal(uv1, uv2);
+}
+
+static uc_value_t *
+uc_uniq(uc_vm_t *vm, size_t nargs)
+{
+ uc_value_t *list = uc_fn_arg(0);
+ uc_value_t *uniq = NULL;
+ struct lh_table *seen;
+ unsigned long hash;
+ uc_value_t *item;
+ size_t i, len;
+
+ if (ucv_type(list) != UC_ARRAY)
+ return NULL;
+
+ seen = lh_table_new(16, NULL, uc_uniq_ucv_hash, uc_uniq_ucv_equal);
+ uniq = ucv_array_new(vm);
+
+ assert(seen && uniq);
+
+ for (i = 0, len = ucv_array_length(list); i < len; i++) {
+ item = ucv_array_get(list, i);
+ hash = lh_get_hash(seen, item);
+
+ if (!lh_table_lookup_entry_w_hash(seen, item, hash)) {
+ lh_table_insert_w_hash(seen, item, NULL, hash, 0);
+ ucv_array_push(uniq, ucv_get(item));
+ }
+ }
+
+ lh_table_free(seen);
+
+ return uniq;
+}
+
const uc_function_list_t uc_stdlib_functions[] = {
{ "chr", uc_chr },
@@ -2872,7 +2968,8 @@ const uc_function_list_t uc_stdlib_functions[] = {
{ "min", uc_min },
{ "max", uc_max },
{ "b64dec", uc_b64dec },
- { "b64enc", uc_b64enc }
+ { "b64enc", uc_b64enc },
+ { "uniq", uc_uniq },
};