summaryrefslogtreecommitdiffhomepage
path: root/lib.c
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2022-09-04 20:51:54 +0200
committerJo-Philipp Wich <jo@mein.io>2022-09-05 00:21:14 +0200
commitc1f7b3b57a58c4264cffb6332ef1f985d077ae75 (patch)
treee0f63adbe913a5916a2b7aa782559aad8af4f431 /lib.c
parent344fa9e69da43ecdc4d8f7768d85d42639352405 (diff)
lib: remove fixed capture group limit in match() and regex replace()
Instead of supporting only up to 10 potential regular expression captures, infer the amount of required captures directly from the compiled regexp structure and allocate the match range array dynamically. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'lib.c')
-rw-r--r--lib.c112
1 files changed, 57 insertions, 55 deletions
diff --git a/lib.c b/lib.c
index 5d76cc8..100249c 100644
--- a/lib.c
+++ b/lib.c
@@ -1924,7 +1924,7 @@ uc_match(uc_vm_t *vm, size_t nargs)
uc_value_t *subject = uc_fn_arg(0);
uc_value_t *pattern = uc_fn_arg(1);
uc_value_t *rv = NULL, *m;
- regmatch_t pmatch[10];
+ regmatch_t *pmatch = NULL;
int eflags = 0, res;
uc_regexp_t *re;
bool freeable;
@@ -1934,21 +1934,30 @@ uc_match(uc_vm_t *vm, size_t nargs)
if (ucv_type(pattern) != UC_REGEXP || !subject)
return NULL;
- p = uc_cast_string(vm, &subject, &freeable);
re = (uc_regexp_t *)pattern;
+ pmatch = calloc(1 + re->regexp.re_nsub, sizeof(regmatch_t));
+
+ if (!pmatch)
+ return NULL;
+
+ p = uc_cast_string(vm, &subject, &freeable);
+
while (true) {
- res = regexec(&re->regexp, p, ARRAY_SIZE(pmatch), pmatch, eflags);
+ res = regexec(&re->regexp, p, 1 + re->regexp.re_nsub, pmatch, eflags);
if (res == REG_NOMATCH)
break;
m = ucv_array_new(vm);
- for (i = 0; i < ARRAY_SIZE(pmatch) && pmatch[i].rm_so != -1; i++) {
- ucv_array_push(m,
- ucv_string_new_length(p + pmatch[i].rm_so,
- pmatch[i].rm_eo - pmatch[i].rm_so));
+ for (i = 0; i < 1 + re->regexp.re_nsub; i++) {
+ if (pmatch[i].rm_so != -1)
+ ucv_array_push(m,
+ ucv_string_new_length(p + pmatch[i].rm_so,
+ pmatch[i].rm_eo - pmatch[i].rm_so));
+ else
+ ucv_array_push(m, NULL);
}
if (re->global) {
@@ -1972,13 +1981,15 @@ uc_match(uc_vm_t *vm, size_t nargs)
}
}
+ free(pmatch);
+
if (freeable)
free(p);
return rv;
}
-static uc_value_t *
+static void
uc_replace_cb(uc_vm_t *vm, uc_value_t *func,
const char *subject, regmatch_t *pmatch, size_t plen,
uc_stringbuf_t *resbuf)
@@ -1989,22 +2000,22 @@ uc_replace_cb(uc_vm_t *vm, uc_value_t *func,
uc_vm_ctx_push(vm);
uc_vm_stack_push(vm, ucv_get(func));
- for (i = 0; i < plen && pmatch[i].rm_so != -1; i++) {
- uc_vm_stack_push(vm,
- ucv_string_new_length(subject + pmatch[i].rm_so,
- pmatch[i].rm_eo - pmatch[i].rm_so));
+ for (i = 0; i < plen; i++) {
+ if (pmatch[i].rm_so != -1)
+ uc_vm_stack_push(vm,
+ ucv_string_new_length(subject + pmatch[i].rm_so,
+ pmatch[i].rm_eo - pmatch[i].rm_so));
+ else
+ uc_vm_stack_push(vm, NULL);
}
- if (uc_vm_call(vm, true, i))
- return NULL;
-
- rv = uc_vm_stack_pop(vm);
-
- ucv_to_stringbuf(vm, resbuf, rv, false);
+ if (uc_vm_call(vm, true, i) == EXCEPTION_NONE) {
+ rv = uc_vm_stack_pop(vm);
- ucv_put(rv);
+ ucv_to_stringbuf(vm, resbuf, rv, false);
- return NULL;
+ ucv_put(rv);
+ }
}
static void
@@ -2089,44 +2100,45 @@ uc_replace(uc_vm_t *vm, size_t nargs)
uc_value_t *pattern = uc_fn_arg(1);
uc_value_t *replace = uc_fn_arg(2);
bool sb_freeable, pt_freeable;
- uc_value_t *rv = NULL;
+ regmatch_t *pmatch = NULL;
+ uc_regexp_t *re = NULL;
uc_stringbuf_t *resbuf;
- regmatch_t pmatch[10];
int eflags = 0, res;
- uc_regexp_t *re;
- size_t pl;
+ size_t pl, nmatch;
if (!pattern || !subject || !replace)
return NULL;
- sb = uc_cast_string(vm, &subject, &sb_freeable);
- resbuf = ucv_stringbuf_new();
+ nmatch = 1;
if (ucv_type(pattern) == UC_REGEXP) {
re = (uc_regexp_t *)pattern;
+ nmatch += re->regexp.re_nsub;
+ }
+
+ pmatch = calloc(nmatch, sizeof(regmatch_t));
+
+ if (!pmatch)
+ return NULL;
+
+ sb = uc_cast_string(vm, &subject, &sb_freeable);
+ resbuf = ucv_stringbuf_new();
+
+ if (re) {
p = sb;
while (true) {
- res = regexec(&re->regexp, p, ARRAY_SIZE(pmatch), pmatch, eflags);
+ res = regexec(&re->regexp, p, nmatch, pmatch, eflags);
if (res == REG_NOMATCH)
break;
ucv_stringbuf_addstr(resbuf, p, pmatch[0].rm_so);
- if (ucv_is_callable(replace)) {
- rv = uc_replace_cb(vm, replace, p, pmatch, ARRAY_SIZE(pmatch), resbuf);
-
- if (rv) {
- if (sb_freeable)
- free(sb);
-
- return rv;
- }
- }
- else {
- uc_replace_str(vm, replace, p, pmatch, ARRAY_SIZE(pmatch), resbuf);
- }
+ if (ucv_is_callable(replace))
+ uc_replace_cb(vm, replace, p, pmatch, nmatch, resbuf);
+ else
+ uc_replace_str(vm, replace, p, pmatch, nmatch, resbuf);
if (pmatch[0].rm_so != pmatch[0].rm_eo)
p += pmatch[0].rm_eo;
@@ -2156,22 +2168,10 @@ uc_replace(uc_vm_t *vm, size_t nargs)
pmatch[0].rm_so = p - l;
pmatch[0].rm_eo = pmatch[0].rm_so + pl;
- if (ucv_is_callable(replace)) {
- rv = uc_replace_cb(vm, replace, l, pmatch, 1, resbuf);
-
- if (rv) {
- if (sb_freeable)
- free(sb);
-
- if (pt_freeable)
- free(pt);
-
- return rv;
- }
- }
- else {
+ if (ucv_is_callable(replace))
+ uc_replace_cb(vm, replace, l, pmatch, 1, resbuf);
+ else
uc_replace_str(vm, replace, l, pmatch, 1, resbuf);
- }
if (pl) {
l = p + pl;
@@ -2192,6 +2192,8 @@ uc_replace(uc_vm_t *vm, size_t nargs)
free(pt);
}
+ free(pmatch);
+
if (sb_freeable)
free(sb);