diff options
-rw-r--r-- | README.md | 14 | ||||
-rw-r--r-- | lib.c | 31 | ||||
-rw-r--r-- | tests/custom/03_stdlib/18_split | 16 | ||||
-rw-r--r-- | tests/custom/03_stdlib/33_replace | 34 |
4 files changed, 86 insertions, 9 deletions
@@ -934,17 +934,22 @@ If `off` is negative then it starts that far from the end of the array. If removes the elements from `off` onward except for `-len` elements at the end of the array. If both `off` and `len` are omitted, removes everything. -#### 6.33. `split(str, sep)` +#### 6.33. `split(str, sep[, limit])` Split the given string using the separator passed as second argument and return an array containing the resulting pieces. +If a limit argument is supplied, the resulting array contains no more than the +given amount of entries, that means the string is split at most `limit - 1` +times total. + The separator may either be a plain string or a regular expression. ```javascript split("foo,bar,baz", ",") // ["foo", "bar", "baz"] split("foobar", "") // ["f", "o", "o", "b", "a", "r"] split("foo,bar,baz", /[ao]/) // ["f", "", ",b", "r,b", "z"] +split("foo=bar=baz", "=", 2) // ["foo", "bar=baz"] ``` #### 6.34. `sqrt(x)` @@ -1095,7 +1100,7 @@ match("foobarbaz", /b.(.)/) // ["bar", "r"] match("foobarbaz", /b.(.)/g) // [["bar", "r"], ["baz", "z"]] ``` -#### 6.47. `replace(str, /pattern/, replace)` +#### 6.47. `replace(str, /pattern/, replace[, limit])` Replace occurences of the specified pattern in the string passed as first argument. The pattern value may be either a regular expression or a plain @@ -1103,6 +1108,9 @@ string. The replace value may be a function which is invoked for each found pattern or any other value which is converted into a plain string and used as replacement. +When an optional limit is specified, substitutions are performed only that +many times. + If the pattern is a regular expression and not using the `g` flag, then only the first occurence in the string is replaced, if the `g` flag is used or if the pattern is not a regular expression, all occurrences are replaced. @@ -1126,6 +1134,8 @@ replace("barfoobaz", "a", "X") // bXrfoobXz replace("barfoobaz", /(.)(.)(.)/g, function(m, c1, c2, c3) { return c3 + c2 + c1; }) // raboofzab +replace("aaaaa", "a", "x", 3) // xxxaa +replace("foo bar baz", /[ao]/g, "x", 3) // fxx bxr baz ``` #### 6.48. `json(str)` @@ -1023,9 +1023,10 @@ uc_split(uc_vm_t *vm, size_t nargs) { uc_value_t *str = uc_fn_arg(0); uc_value_t *sep = uc_fn_arg(1); + uc_value_t *lim = uc_fn_arg(2); uc_value_t *arr = NULL; const char *p, *sepstr, *splitstr; - size_t seplen, splitlen; + size_t seplen, splitlen, limit; int eflags = 0, res; regmatch_t pmatch; uc_regexp_t *re; @@ -1036,11 +1037,15 @@ uc_split(uc_vm_t *vm, size_t nargs) arr = ucv_array_new(vm); splitlen = ucv_string_length(str); p = splitstr = ucv_string_get(str); + limit = lim ? ucv_uint64_get(lim) : SIZE_MAX; + + if (limit == 0) + goto out; if (ucv_type(sep) == UC_REGEXP) { re = (uc_regexp_t *)sep; - while (true) { + while (limit > 1) { res = regexec(&re->regexp, splitstr, 1, &pmatch, eflags); if (res == REG_NOMATCH) @@ -1059,6 +1064,7 @@ uc_split(uc_vm_t *vm, size_t nargs) } eflags |= REG_NOTBOL; + limit--; } ucv_array_push(arr, ucv_string_new(splitstr)); @@ -1071,20 +1077,25 @@ uc_split(uc_vm_t *vm, size_t nargs) ucv_array_push(arr, ucv_string_new_length("", 0)); } else if (seplen == 0) { - while (splitlen > 0) { + while (limit > 1 && splitlen > 0) { ucv_array_push(arr, ucv_string_new_length(p, 1)); + limit--; splitlen--; p++; } + + if (splitlen > 0) + ucv_array_push(arr, ucv_string_new_length(p, splitlen)); } else { - while (splitlen >= seplen) { + while (limit > 1 && splitlen >= seplen) { if (!memcmp(p, sepstr, seplen)) { ucv_array_push(arr, ucv_string_new_length(splitstr, p - splitstr)); p = splitstr = p + seplen; splitlen -= seplen; + limit--; continue; } @@ -2099,12 +2110,13 @@ uc_replace(uc_vm_t *vm, size_t nargs) uc_value_t *subject = uc_fn_arg(0); uc_value_t *pattern = uc_fn_arg(1); uc_value_t *replace = uc_fn_arg(2); + uc_value_t *limitval = uc_fn_arg(3); bool sb_freeable, pt_freeable; regmatch_t *pmatch = NULL; + size_t pl, nmatch, limit; uc_regexp_t *re = NULL; uc_stringbuf_t *resbuf; int eflags = 0, res; - size_t pl, nmatch; if (!pattern || !subject || !replace) return NULL; @@ -2123,11 +2135,12 @@ uc_replace(uc_vm_t *vm, size_t nargs) sb = uc_cast_string(vm, &subject, &sb_freeable); resbuf = ucv_stringbuf_new(); + limit = limitval ? ucv_uint64_get(limitval) : SIZE_MAX; if (re) { p = sb; - while (true) { + while (limit > 0) { res = regexec(&re->regexp, p, nmatch, pmatch, eflags); if (res == REG_NOMATCH) @@ -2151,6 +2164,8 @@ uc_replace(uc_vm_t *vm, size_t nargs) eflags |= REG_NOTBOL; else break; + + limit--; } ucv_stringbuf_addstr(resbuf, p, strlen(p)); @@ -2161,7 +2176,7 @@ uc_replace(uc_vm_t *vm, size_t nargs) l = p = sb; - while (true) { + while (limit > 0) { if (pl == 0 || !strncmp(p, pt, pl)) { ucv_stringbuf_addstr(resbuf, l, p - l); @@ -2180,6 +2195,8 @@ uc_replace(uc_vm_t *vm, size_t nargs) else { l = p; } + + limit--; } if (!*p++) diff --git a/tests/custom/03_stdlib/18_split b/tests/custom/03_stdlib/18_split index 5ee35a2..20d5c8d 100644 --- a/tests/custom/03_stdlib/18_split +++ b/tests/custom/03_stdlib/18_split @@ -43,6 +43,18 @@ argument is neither a string nor a regular expression. // subject and split strings handle embedded \0 split("foo=1\0bar=2\0baz=3", "\0"), + + // supplying a limit only splits the string into that many parts + split("foo=1=2=3", "=", 2), + + // limit of one produces a result array conaining the entire string as sole item + split("foo=1=2=3", "=", 1), + + // negative limit yields an empty result array + split("foo=1=2=3", "=", -1), + + // zero limit yields an empty result array + split("foo=1=2=3", "=", 0), ]), "\n"); %} -- End -- @@ -62,6 +74,10 @@ argument is neither a string nor a regular expression. [ "", "abc", "def", "" ] [ "", "foo", "bar", "" ] [ "foo=1", "bar=2", "baz=3" ] +[ "foo", "1=2=3" ] +[ "foo=1=2=3" ] +[ ] +[ ] -- End -- diff --git a/tests/custom/03_stdlib/33_replace b/tests/custom/03_stdlib/33_replace index b662ae8..388959f 100644 --- a/tests/custom/03_stdlib/33_replace +++ b/tests/custom/03_stdlib/33_replace @@ -205,3 +205,37 @@ In [anonymous function](), line 2, byte 40: -- End -- + + +An optional limit parameter controls the maximum amount of replacements. + +-- Testcase -- +{% + printf("%.J\n", [ + // negative limit performs no substitution + replace("aaaaa", "a", "x", -1), + + // zero limit performs no substitution + replace("aaaaa", "a", "x", 0), + + // positive limit + replace("aaaaa", "a", "x", 3), + + // same rules apply to regex replaces: + replace("foo bar baz", /[ao]/g, "x", -1), + replace("foo bar baz", /[ao]/g, "x", 0), + replace("foo bar baz", /[ao]/g, "x", 3), + ]); +%} +-- End -- + +-- Expect stdout -- +[ + "aaaaa", + "aaaaa", + "xxxaa", + "foo bar baz", + "foo bar baz", + "fxx bxr baz" +] +-- End -- |