diff options
author | Jo-Philipp Wich <jo@mein.io> | 2022-09-04 20:51:54 +0200 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2022-09-05 00:21:14 +0200 |
commit | c1f7b3b57a58c4264cffb6332ef1f985d077ae75 (patch) | |
tree | e0f63adbe913a5916a2b7aa782559aad8af4f431 /lib.c | |
parent | 344fa9e69da43ecdc4d8f7768d85d42639352405 (diff) |
lib: remove fixed capture group limit in match() and regex replace()
Instead of supporting only up to 10 potential regular expression captures,
infer the amount of required captures directly from the compiled regexp
structure and allocate the match range array dynamically.
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'lib.c')
-rw-r--r-- | lib.c | 112 |
1 files changed, 57 insertions, 55 deletions
@@ -1924,7 +1924,7 @@ uc_match(uc_vm_t *vm, size_t nargs) uc_value_t *subject = uc_fn_arg(0); uc_value_t *pattern = uc_fn_arg(1); uc_value_t *rv = NULL, *m; - regmatch_t pmatch[10]; + regmatch_t *pmatch = NULL; int eflags = 0, res; uc_regexp_t *re; bool freeable; @@ -1934,21 +1934,30 @@ uc_match(uc_vm_t *vm, size_t nargs) if (ucv_type(pattern) != UC_REGEXP || !subject) return NULL; - p = uc_cast_string(vm, &subject, &freeable); re = (uc_regexp_t *)pattern; + pmatch = calloc(1 + re->regexp.re_nsub, sizeof(regmatch_t)); + + if (!pmatch) + return NULL; + + p = uc_cast_string(vm, &subject, &freeable); + while (true) { - res = regexec(&re->regexp, p, ARRAY_SIZE(pmatch), pmatch, eflags); + res = regexec(&re->regexp, p, 1 + re->regexp.re_nsub, pmatch, eflags); if (res == REG_NOMATCH) break; m = ucv_array_new(vm); - for (i = 0; i < ARRAY_SIZE(pmatch) && pmatch[i].rm_so != -1; i++) { - ucv_array_push(m, - ucv_string_new_length(p + pmatch[i].rm_so, - pmatch[i].rm_eo - pmatch[i].rm_so)); + for (i = 0; i < 1 + re->regexp.re_nsub; i++) { + if (pmatch[i].rm_so != -1) + ucv_array_push(m, + ucv_string_new_length(p + pmatch[i].rm_so, + pmatch[i].rm_eo - pmatch[i].rm_so)); + else + ucv_array_push(m, NULL); } if (re->global) { @@ -1972,13 +1981,15 @@ uc_match(uc_vm_t *vm, size_t nargs) } } + free(pmatch); + if (freeable) free(p); return rv; } -static uc_value_t * +static void uc_replace_cb(uc_vm_t *vm, uc_value_t *func, const char *subject, regmatch_t *pmatch, size_t plen, uc_stringbuf_t *resbuf) @@ -1989,22 +2000,22 @@ uc_replace_cb(uc_vm_t *vm, uc_value_t *func, uc_vm_ctx_push(vm); uc_vm_stack_push(vm, ucv_get(func)); - for (i = 0; i < plen && pmatch[i].rm_so != -1; i++) { - uc_vm_stack_push(vm, - ucv_string_new_length(subject + pmatch[i].rm_so, - pmatch[i].rm_eo - pmatch[i].rm_so)); + for (i = 0; i < plen; i++) { + if (pmatch[i].rm_so != -1) + uc_vm_stack_push(vm, + ucv_string_new_length(subject + pmatch[i].rm_so, + pmatch[i].rm_eo - pmatch[i].rm_so)); + else + uc_vm_stack_push(vm, NULL); } - if (uc_vm_call(vm, true, i)) - return NULL; - - rv = uc_vm_stack_pop(vm); - - ucv_to_stringbuf(vm, resbuf, rv, false); + if (uc_vm_call(vm, true, i) == EXCEPTION_NONE) { + rv = uc_vm_stack_pop(vm); - ucv_put(rv); + ucv_to_stringbuf(vm, resbuf, rv, false); - return NULL; + ucv_put(rv); + } } static void @@ -2089,44 +2100,45 @@ uc_replace(uc_vm_t *vm, size_t nargs) uc_value_t *pattern = uc_fn_arg(1); uc_value_t *replace = uc_fn_arg(2); bool sb_freeable, pt_freeable; - uc_value_t *rv = NULL; + regmatch_t *pmatch = NULL; + uc_regexp_t *re = NULL; uc_stringbuf_t *resbuf; - regmatch_t pmatch[10]; int eflags = 0, res; - uc_regexp_t *re; - size_t pl; + size_t pl, nmatch; if (!pattern || !subject || !replace) return NULL; - sb = uc_cast_string(vm, &subject, &sb_freeable); - resbuf = ucv_stringbuf_new(); + nmatch = 1; if (ucv_type(pattern) == UC_REGEXP) { re = (uc_regexp_t *)pattern; + nmatch += re->regexp.re_nsub; + } + + pmatch = calloc(nmatch, sizeof(regmatch_t)); + + if (!pmatch) + return NULL; + + sb = uc_cast_string(vm, &subject, &sb_freeable); + resbuf = ucv_stringbuf_new(); + + if (re) { p = sb; while (true) { - res = regexec(&re->regexp, p, ARRAY_SIZE(pmatch), pmatch, eflags); + res = regexec(&re->regexp, p, nmatch, pmatch, eflags); if (res == REG_NOMATCH) break; ucv_stringbuf_addstr(resbuf, p, pmatch[0].rm_so); - if (ucv_is_callable(replace)) { - rv = uc_replace_cb(vm, replace, p, pmatch, ARRAY_SIZE(pmatch), resbuf); - - if (rv) { - if (sb_freeable) - free(sb); - - return rv; - } - } - else { - uc_replace_str(vm, replace, p, pmatch, ARRAY_SIZE(pmatch), resbuf); - } + if (ucv_is_callable(replace)) + uc_replace_cb(vm, replace, p, pmatch, nmatch, resbuf); + else + uc_replace_str(vm, replace, p, pmatch, nmatch, resbuf); if (pmatch[0].rm_so != pmatch[0].rm_eo) p += pmatch[0].rm_eo; @@ -2156,22 +2168,10 @@ uc_replace(uc_vm_t *vm, size_t nargs) pmatch[0].rm_so = p - l; pmatch[0].rm_eo = pmatch[0].rm_so + pl; - if (ucv_is_callable(replace)) { - rv = uc_replace_cb(vm, replace, l, pmatch, 1, resbuf); - - if (rv) { - if (sb_freeable) - free(sb); - - if (pt_freeable) - free(pt); - - return rv; - } - } - else { + if (ucv_is_callable(replace)) + uc_replace_cb(vm, replace, l, pmatch, 1, resbuf); + else uc_replace_str(vm, replace, l, pmatch, 1, resbuf); - } if (pl) { l = p + pl; @@ -2192,6 +2192,8 @@ uc_replace(uc_vm_t *vm, size_t nargs) free(pt); } + free(pmatch); + if (sb_freeable) free(sb); |