summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--README.md60
-rw-r--r--ast.c99
-rw-r--r--ast.h7
-rw-r--r--eval.c2
-rw-r--r--lexer.c165
-rw-r--r--lexer.h2
-rw-r--r--lib.c297
-rw-r--r--parser.y1
8 files changed, 610 insertions, 23 deletions
diff --git a/README.md b/README.md
index 5b947c5..74ea217 100644
--- a/README.md
+++ b/README.md
@@ -770,14 +770,17 @@ If `off` is negative then it starts that far from the end of the array. If
removes the elements from `off` onward except for `-len` elements at the end of
the array. If both `off` and `len` are omitted, removes everything.
-#### 6.34. `split(sep, str)`
+#### 6.34. `split(str, sep)`
-Split the given string using the separator passed as first argument and return
+Split the given string using the separator passed as second argument and return
an array containing the resulting pieces.
+The separator may either be a plain string or a regular expression.
+
```javascript
-split(",", "foo,bar,baz") // ["foo", "bar", "baz"]
-split("", "foobar") // ["f", "o", "o", "b", "a", "r"]
+split("foo,bar,baz", ",") // ["foo", "bar", "baz"]
+split("foobar", "") // ["f", "o", "o", "b", "a", "r"]
+split("foo,bar,baz", /[ao]/) // ["f", "", ",b", "r,b", "z"]
```
#### 6.35. `sqrt(x)`
@@ -889,3 +892,52 @@ Formats the given arguments according to the given format string and returns the
resulting string.
See `printf()` for details.
+
+#### 6.47. `match(str, /pattern/)`
+
+Match the given string against the regular expression pattern specified as
+second argument.
+
+If the passed regular expression uses the `g` flag, the return value will be an
+array of arrays describing all found occurences within the string.
+
+Without the `g` modifier, an array describing the first match is returned.
+Returns `null` if the pattern was not found within the given string.
+
+```javascript
+match("foobarbaz", /b.(.)/) // ["bar", "r"]
+match("foobarbaz", /b.(.)/g) // [["bar", "r"], ["baz", "z"]]
+```
+
+#### 6.48. `replace(str, /pattern/, replace)`
+
+Replace occurences of the specified pattern in the string passed as first
+argument. The pattern value may be either a regular expression or a plain
+string. The replace value may be a function which is invoked for each found
+pattern or any other value which is converted into a plain string and used as
+replacement.
+
+If the pattern is a regular expression and not using the `g` flag, then only the
+first occurence in the string is replaced, if the `g` flag is used or if the
+pattern is not a regular expression, all occurrences are replaced.
+
+If the replace value is a callback function, it is invoked with the found
+substring as first and any capture group values as subsequent parameters.
+
+If the replace value is a string, the following special substrings are
+substituted before it is inserted into the result:
+
+ - `$$` - replaced by a literal `$`
+ - ``$` `` - replaced by the text before the match
+ - `$'` - replaced by the text after the match
+ - `$&` - replaced by the matched substring
+ - `$1`..`$9` - replaced by the value of the corresponding capture group, if the capture group is not defined, it is not substituted
+
+```javascript
+replace("barfoobaz", /(f)(o+)/g, "[$$|$`|$&|$'|$1|$2|$3]") // bar[$|bar|foo|baz|f|oo|$3]baz
+replace("barfoobaz", /(f)(o+)/g, uc) // barFOObaz
+replace("barfoobaz", "a", "X") // bXrfoobXz
+replace("barfoobaz", /(.)(.)(.)/g, function(m, c1, c2, c3) {
+ return c3 + c2 + c1;
+}) // raboofzab
+```
diff --git a/ast.c b/ast.c
index 084bbcb..4e6ea9b 100644
--- a/ast.c
+++ b/ast.c
@@ -24,6 +24,7 @@
#include <stdarg.h>
#include <string.h>
#include <math.h>
+#include <regex.h>
static size_t ut_ext_types_count = 0;
static struct ut_extended_type *ut_ext_types = NULL;
@@ -198,6 +199,104 @@ ut_new_object(struct json_object *proto) {
return op->val;
}
+static void
+re_free(struct json_object *v, void *ud)
+{
+ struct ut_op *op = ud;
+
+ regfree((regex_t *)op->tag.data);
+ free(op);
+}
+
+static int
+re_to_string(struct json_object *v, struct printbuf *pb, int level, int flags)
+{
+ struct ut_op *op = json_object_get_userdata(v);
+ struct json_object *s;
+ const char *p;
+ size_t len;
+
+ sprintbuf(pb, "%s/", level ? "\"" : "");
+
+ s = json_object_new_string((char *)op + sizeof(*op) + sizeof(regex_t));
+
+ if (s) {
+ if (level) {
+ for (p = json_object_to_json_string(s) + 1, len = strlen(p) - 1; len > 0; len--, p++)
+ sprintbuf(pb, "%c", *p);
+ }
+ else {
+ sprintbuf(pb, "%s", json_object_get_string(s));
+ }
+ }
+ else {
+ sprintbuf(pb, "...");
+ }
+
+ json_object_put(s);
+
+ return sprintbuf(pb, "/%s%s%s%s",
+ op->is_reg_global ? "g" : "",
+ op->is_reg_icase ? "i" : "",
+ op->is_reg_newline ? "s" : "",
+ level ? "\"" : "");
+}
+
+struct json_object *
+ut_new_regexp(const char *source, bool icase, bool newline, bool global, char **err) {
+ int cflags = REG_EXTENDED, res;
+ struct ut_op *op;
+ regex_t *re;
+ size_t len;
+
+ op = calloc(1, sizeof(*op) + sizeof(*re) + strlen(source) + 1);
+
+ if (!op)
+ return NULL;
+
+ re = (regex_t *)((char *)op + sizeof(*op));
+ strcpy((char *)op + sizeof(*op) + sizeof(*re), source);
+
+ if (icase)
+ cflags |= REG_ICASE;
+
+ if (newline)
+ cflags |= REG_NEWLINE;
+
+ op->type = T_REGEXP;
+ op->tag.data = re;
+ op->is_reg_icase = icase;
+ op->is_reg_global = global;
+ op->is_reg_newline = newline;
+
+ res = regcomp(re, source, cflags);
+
+ if (res != 0) {
+ len = regerror(res, re, NULL, 0);
+ *err = calloc(1, len);
+
+ if (*err)
+ regerror(res, re, *err, len);
+
+ free(op);
+
+ return NULL;
+ }
+
+ //op->val = json_object_new_string(source);
+ op->val = json_object_new_object();
+
+ if (!op->val) {
+ free(op);
+
+ return NULL;
+ }
+
+ json_object_set_serializer(op->val, re_to_string, op, re_free);
+
+ return op->val;
+}
+
static int
func_to_string(struct json_object *v, struct printbuf *pb, int level, int flags)
{
diff --git a/ast.h b/ast.h
index 52e22e8..c6d5aeb 100644
--- a/ast.h
+++ b/ast.h
@@ -40,6 +40,7 @@ enum ut_error_type {
UT_ERROR_OVERLONG_STRING,
UT_ERROR_INVALID_ESCAPE,
UT_ERROR_NESTED_BLOCKS,
+ UT_ERROR_INVALID_REGEXP,
UT_ERROR_EXCEPTION
};
@@ -58,6 +59,9 @@ struct ut_op {
uint16_t is_postfix:1;
uint16_t is_for_in:1;
uint16_t is_list:1;
+ uint16_t is_reg_icase:1;
+ uint16_t is_reg_newline:1;
+ uint16_t is_reg_global:1;
uint32_t off;
struct json_object *val;
union {
@@ -84,6 +88,7 @@ struct ut_state {
uint8_t lstrip_blocks:1;
uint8_t strict_declarations:1;
uint8_t skip_shebang:1;
+ uint8_t expect_div:1;
size_t off;
enum ut_block_type blocktype;
struct {
@@ -91,6 +96,7 @@ struct ut_state {
union {
struct json_object *exception;
uint64_t tokens[2];
+ char *regexp_error;
} info;
} error;
struct {
@@ -130,6 +136,7 @@ struct json_object *ut_new_func(struct ut_op *decl);
struct json_object *ut_new_object(struct json_object *proto);
struct json_object *ut_new_double(double v);
struct json_object *ut_new_null(void);
+struct json_object *ut_new_regexp(const char *source, bool icase, bool newline, bool global, char **err);
bool ut_register_extended_type(const char *name, struct json_object *proto, void (*freefn)(void *));
struct json_object *ut_set_extended_type(struct json_object *v, const char *name, void *data);
diff --git a/eval.c b/eval.c
index 7de69e2..d3151e6 100644
--- a/eval.c
+++ b/eval.c
@@ -24,6 +24,7 @@
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <regex.h>
char exception_tag_space[sizeof(struct ut_op) + sizeof(struct ut_op *)];
static struct ut_op *exception_tag = (struct ut_op *)exception_tag_space;
@@ -1549,6 +1550,7 @@ ut_execute_op(struct ut_state *state, uint32_t off)
case T_BOOL:
case T_STRING:
case T_NULL:
+ case T_REGEXP:
return json_object_get(op->val);
case T_THIS:
diff --git a/lexer.c b/lexer.c
index 66ec8a0..455b776 100644
--- a/lexer.c
+++ b/lexer.c
@@ -45,6 +45,7 @@ struct token {
static int parse_comment(const char *, struct ut_op *, struct ut_state *);
static int parse_string(const char *, struct ut_op *, struct ut_state *);
+static int parse_regexp(const char *, struct ut_op *, struct ut_state *);
static int parse_number(const char *, struct ut_op *, struct ut_state *);
static int parse_label(const char *, struct ut_op *, struct ut_state *);
static int parse_bool(const char *, struct ut_op *, struct ut_state *);
@@ -68,7 +69,7 @@ static const struct token tokens[] = {
{ T_ASBAND, "&=", 2 },
{ T_ASBOR, "|=", 2 },
{ T_ASBXOR, "^=", 2 },
- { T_ASDIV, "/=", 2 },
+ //{ T_ASDIV, "/=", 2 },
{ T_ASMOD, "%=", 2 },
{ T_ASMUL, "*=", 2 },
{ T_ASSUB, "-=", 2 },
@@ -100,7 +101,7 @@ static const struct token tokens[] = {
{ T_COLON, ":", 1 },
{ T_COMMA, ",", 1 },
{ T_COMPL, "~", 1 },
- { T_DIV, "/", 1 },
+ //{ T_DIV, "/", 1 },
{ T_GT, ">", 1 },
{ T_NOT, "!", 1 },
{ T_LT, "<", 1 },
@@ -114,6 +115,7 @@ static const struct token tokens[] = {
{ T_DOT, ".", 1 },
{ T_STRING, "'", 1, parse_string },
{ T_STRING, "\"", 1, parse_string },
+ { T_REGEXP, "/", 1, parse_regexp },
{ T_LABEL, "_", 1, parse_label },
{ T_LABEL, "az", 0, parse_label },
{ T_LABEL, "AZ", 0, parse_label },
@@ -209,6 +211,7 @@ const char *tokennames[__T_MAX] = {
[T_NUMBER] = "Number",
[T_DOUBLE] = "Double",
[T_BOOL] = "Bool",
+ [T_REGEXP] = "Regexp",
[T_TEXT] = "Text",
[T_ENDIF] = "'endif'",
[T_ENDFOR] = "'endfor'",
@@ -415,8 +418,6 @@ next:
/* \xFF */
if (in[0] == 'x') {
-
-
if (isxdigit(in[1]) && isxdigit(in[2])) {
if (!utf8enc(&out, &rem, hex(in[1]) * 16 + hex(in[2]))) {
s->off += (in - buf);
@@ -433,6 +434,19 @@ next:
}
}
+ /* \1 .. \9 (regex backreference) */
+ else if (q == '/' && in[0] >= '0' && in[0] <= '9') {
+ /* in regexp mode, retain backslash */
+ if (rem-- < 1) {
+ s->off += (in - buf);
+
+ return -UT_ERROR_OVERLONG_STRING;
+ }
+
+ *out++ = '\\';
+ *out = *in;
+ }
+
/* \377, \77 or \7 */
else if (in[0] >= '0' && in[0] <= '7') {
if (lead_surrogate) {
@@ -518,6 +532,17 @@ next:
case 't': *out = '\t'; break;
case 'v': *out = '\v'; break;
default:
+ /* in regexp mode, retain backslash */
+ if (q == '/') {
+ if (rem-- < 1) {
+ s->off += (in - buf);
+
+ return -UT_ERROR_OVERLONG_STRING;
+ }
+
+ *out++ = '\\';
+ }
+
*out = *in;
break;
}
@@ -574,6 +599,81 @@ next:
/*
+ * Parses a regexp literal from the given buffer.
+ *
+ * Returns a negative value on error, otherwise the amount of consumed
+ * characters from the given buffer.
+ *
+ * Error values:
+ * -UT_ERROR_UNTERMINATED_STRING Unterminated regexp
+ * -UT_ERROR_INVALID_ESCAPE Invalid escape sequence
+ * -UT_ERROR_OVERLONG_STRING Regexp literal too long
+ * -UT_ERROR_INVALID_REGEXP Could not compile regexp
+ */
+
+static int
+parse_regexp(const char *buf, struct ut_op *op, struct ut_state *s)
+{
+ struct json_object *rv;
+ const char *p;
+ char *err;
+ int len;
+
+ if (s->expect_div == 1) {
+ if (!strncmp(buf, "/=", 2)) {
+ op->type = T_ASDIV;
+ return 2;
+ }
+ else {
+ op->type = T_DIV;
+ return 1;
+ }
+ }
+
+ len = parse_string(buf, op, s);
+
+ if (len < 2) {
+ json_object_put(op->val);
+
+ return (len < 0) ? len : -UT_ERROR_UNTERMINATED_STRING;
+ }
+
+ for (p = buf + len; strchr("gis", *p); p++) {
+ switch (*p) {
+ case 'g':
+ op->is_reg_global = 1;
+ len++;
+ break;
+
+ case 'i':
+ op->is_reg_icase = 1;
+ len++;
+ break;
+
+ case 's':
+ op->is_reg_newline = 1;
+ len++;
+ break;
+ }
+ }
+
+ p = json_object_get_string(op->val);
+ rv = ut_new_regexp(p, op->is_reg_icase, op->is_reg_newline, op->is_reg_global, &err);
+
+ json_object_put(op->val);
+ op->val = rv;
+
+ if (!rv) {
+ s->error.info.regexp_error = err;
+
+ return -UT_ERROR_INVALID_REGEXP;
+ }
+
+ return len;
+}
+
+
+/*
* Parses a label from the given buffer.
*
* Returns a negative value on error, otherwise the amount of consumed
@@ -837,8 +937,63 @@ ut_get_token(struct ut_state *s, const char *input, int *mlen)
rv = ut_new_op(s, op.type, op.val, UINT32_MAX);
- if (rv)
+ if (rv) {
s->pool[rv - 1].is_overflow = op.is_overflow;
+ s->pool[rv - 1].is_reg_icase = op.is_reg_icase;
+ s->pool[rv - 1].is_reg_global = op.is_reg_global;
+ s->pool[rv - 1].is_reg_newline = op.is_reg_newline;
+ }
+
+ /* Follow JSLint logic and treat a slash after any of the
+ * `(,=:[!&|?{};` characters as the beginning of a regex
+ * literal... */
+ switch (op.type) {
+ case T_LPAREN:
+ case T_COMMA:
+
+ case T_ASADD:
+ case T_ASBAND:
+ case T_ASBOR:
+ case T_ASBXOR:
+ case T_ASDIV:
+ case T_ASLEFT:
+ case T_ASMOD:
+ case T_ASMUL:
+ case T_ASRIGHT:
+ case T_ASSIGN:
+ case T_ASSUB:
+ case T_EQ:
+ case T_EQS:
+ case T_GE:
+ case T_LE:
+ case T_NE:
+ case T_NES:
+
+ case T_COLON:
+ case T_LBRACK:
+ case T_NOT:
+
+ case T_AND:
+ case T_BAND:
+
+ case T_OR:
+ case T_BOR:
+
+ case T_QMARK:
+
+ case T_LBRACE:
+ case T_RBRACE:
+
+ case T_LSTM:
+ case T_LEXP:
+
+ case T_SCOL:
+ s->expect_div = 0;
+ break;
+
+ default:
+ s->expect_div = 1;
+ }
return rv;
}
diff --git a/lexer.h b/lexer.h
index dd31600..b3911c8 100644
--- a/lexer.h
+++ b/lexer.h
@@ -19,7 +19,7 @@
#include "ast.h"
-#define __T_MAX 78
+#define __T_MAX 79
#define T_EXCEPTION (__T_MAX + 0)
#define T_CFUNC (__T_MAX + 1)
#define T_RESSOURCE (__T_MAX + 2)
diff --git a/lib.c b/lib.c
index 1ca1c6d..5a17d52 100644
--- a/lib.c
+++ b/lib.c
@@ -201,6 +201,13 @@ ut_format_error(struct ut_state *state, const char *expr)
sprintf_append(&msg, &msglen, "\n");
break;
+ case UT_ERROR_INVALID_REGEXP:
+ if (state->error.info.regexp_error)
+ sprintf_append(&msg, &msglen, "Syntax error: %s\n", state->error.info.regexp_error);
+ else
+ sprintf_append(&msg, &msglen, "Runtime error: Out of memory while compiling regexp\n");
+ break;
+
case UT_ERROR_EXCEPTION:
tag = json_object_get_userdata(state->error.info.exception);
off = (tag && tag->tree.operand[0]) ? ut_get_op(state, tag->tree.operand[0])->off : 0;
@@ -1020,35 +1027,62 @@ ut_splice(struct ut_state *s, uint32_t off, struct json_object *args)
static struct json_object *
ut_split(struct ut_state *s, uint32_t off, struct json_object *args)
{
- struct json_object *sep = json_object_array_get_idx(args, 0);
- struct json_object *str = json_object_array_get_idx(args, 1);
+ struct json_object *str = json_object_array_get_idx(args, 0);
+ struct json_object *sep = json_object_array_get_idx(args, 1);
struct json_object *arr = NULL;
const char *p, *sepstr, *splitstr;
+ int eflags = 0, res;
+ regmatch_t pmatch;
+ struct ut_op *tag;
size_t seplen;
- if (!json_object_is_type(sep, json_type_string) || !json_object_is_type(str, json_type_string))
+ if (!sep || !json_object_is_type(str, json_type_string))
return NULL;
arr = json_object_new_array();
+ splitstr = json_object_get_string(str);
if (!arr)
return ut_exception(s, off, UT_ERRMSG_OOM);
- sepstr = json_object_get_string(sep);
- splitstr = json_object_get_string(str);
+ if (ut_is_type(sep, T_REGEXP)) {
+ tag = json_object_get_userdata(sep);
+
+ while (true) {
+ res = regexec((regex_t *)tag->tag.data, splitstr, 1, &pmatch, eflags);
- for (p = splitstr + (*sepstr ? 1 : 0), seplen = strlen(sepstr); *p; p++) {
- if (!strncmp(p, sepstr, seplen)) {
- if (*sepstr || p > splitstr)
- json_object_array_add(arr, json_object_new_string_len(splitstr, p - splitstr));
+ if (res == REG_NOMATCH)
+ break;
+
+ json_object_array_add(arr, json_object_new_string_len(splitstr, pmatch.rm_so));
+
+ splitstr += pmatch.rm_eo;
+ eflags |= REG_NOTBOL;
+ }
- splitstr = p + seplen;
- p = splitstr - (*sepstr ? 1 : 0);
+ json_object_array_add(arr, json_object_new_string(splitstr));
+ }
+ else if (json_object_is_type(sep, json_type_string)) {
+ sepstr = json_object_get_string(sep);
+
+ for (p = splitstr + (*sepstr ? 1 : 0), seplen = strlen(sepstr); *p; p++) {
+ if (!strncmp(p, sepstr, seplen)) {
+ if (*sepstr || p > splitstr)
+ json_object_array_add(arr, json_object_new_string_len(splitstr, p - splitstr));
+
+ splitstr = p + seplen;
+ p = splitstr - (*sepstr ? 1 : 0);
+ }
}
+
+ if (*splitstr)
+ json_object_array_add(arr, json_object_new_string_len(splitstr, p - splitstr));
}
+ else {
+ json_object_put(arr);
- if (*splitstr)
- json_object_array_add(arr, json_object_new_string_len(splitstr, p - splitstr));
+ return NULL;
+ }
return arr;
}
@@ -1750,6 +1784,240 @@ ut_arrtoip(struct ut_state *s, uint32_t off, struct json_object *args)
}
}
+static struct json_object *
+ut_match(struct ut_state *s, uint32_t off, struct json_object *args)
+{
+ struct json_object *subject = json_object_array_get_idx(args, 0);
+ struct json_object *pattern = json_object_array_get_idx(args, 1);
+ struct ut_op *tag = json_object_get_userdata(pattern);
+ struct json_object *rv = NULL, *m;
+ int eflags = 0, res, i;
+ regmatch_t pmatch[10];
+ const char *p;
+
+ if (!ut_is_type(pattern, T_REGEXP) || !subject)
+ return NULL;
+
+ p = json_object_get_string(subject);
+
+ while (true) {
+ res = regexec((regex_t *)tag->tag.data, p, ARRAY_SIZE(pmatch), pmatch, eflags);
+
+ if (res == REG_NOMATCH)
+ break;
+
+ m = json_object_new_array();
+
+ for (i = 0; i < ARRAY_SIZE(pmatch) && pmatch[i].rm_so != -1; i++) {
+ json_object_array_add(m,
+ json_object_new_string_len(p + pmatch[i].rm_so,
+ pmatch[i].rm_eo - pmatch[i].rm_so));
+ }
+
+ if (tag->is_reg_global) {
+ if (!rv)
+ rv = json_object_new_array();
+
+ json_object_array_add(rv, m);
+
+ p += pmatch[0].rm_eo;
+ eflags |= REG_NOTBOL;
+ }
+ else {
+ rv = m;
+ break;
+ }
+ }
+
+ return rv;
+}
+
+static struct json_object *
+ut_replace_cb(struct ut_state *s, uint32_t off, struct json_object *func,
+ const char *subject, regmatch_t *pmatch, size_t plen,
+ char **sp, size_t *sl)
+{
+ struct json_object *cbargs = json_object_new_array();
+ struct json_object *rv;
+ size_t i;
+
+ if (!cbargs)
+ return NULL;
+
+ for (i = 0; i < plen && pmatch[i].rm_so != -1; i++) {
+ json_object_array_add(cbargs,
+ json_object_new_string_len(subject + pmatch[i].rm_so,
+ pmatch[i].rm_eo - pmatch[i].rm_so));
+ }
+
+ rv = ut_invoke(s, off, NULL, func, cbargs);
+
+ if (ut_is_type(rv, T_EXCEPTION))
+ return rv;
+
+ sprintf_append(sp, sl, "%s", rv ? json_object_get_string(rv) : "null");
+
+ json_object_put(cbargs);
+ json_object_put(rv);
+
+ return NULL;
+}
+
+static void
+ut_replace_str(struct ut_state *s, uint32_t off, struct json_object *str,
+ const char *subject, regmatch_t *pmatch, size_t plen,
+ char **sp, size_t *sl)
+{
+ const char *r = str ? json_object_get_string(str) : "null";
+ const char *p = r;
+ bool esc = false;
+ int i;
+
+ for (p = r; *p; p++) {
+ if (esc) {
+ switch (*p) {
+ case '&':
+ if (pmatch[0].rm_so != -1)
+ snprintf_append(sp, sl, "%s", pmatch[0].rm_eo - pmatch[0].rm_so,
+ subject + pmatch[0].rm_so);
+ break;
+
+ case '`':
+ if (pmatch[0].rm_so != -1)
+ snprintf_append(sp, sl, "%s", pmatch[0].rm_so, subject);
+ break;
+
+ case '\'':
+ if (pmatch[0].rm_so != -1)
+ sprintf_append(sp, sl, "%s", subject + pmatch[0].rm_eo);
+ break;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ i = *p - '0';
+ if (i < plen && pmatch[i].rm_so != -1)
+ snprintf_append(sp, sl, "%s", pmatch[i].rm_eo - pmatch[i].rm_so,
+ subject + pmatch[i].rm_so);
+ else
+ sprintf_append(sp, sl, "$%c", *p);
+ break;
+
+ case '$':
+ sprintf_append(sp, sl, "$");
+ break;
+
+ default:
+ sprintf_append(sp, sl, "$%c", *p);
+ }
+
+ esc = false;
+ }
+ else if (*p == '$') {
+ esc = true;
+ }
+ else {
+ sprintf_append(sp, sl, "%c", *p);
+ }
+ }
+}
+
+static struct json_object *
+ut_replace(struct ut_state *s, uint32_t off, struct json_object *args)
+{
+ struct json_object *subject = json_object_array_get_idx(args, 0);
+ struct json_object *pattern = json_object_array_get_idx(args, 1);
+ struct json_object *replace = json_object_array_get_idx(args, 2);
+ struct ut_op *tag = json_object_get_userdata(pattern);
+ struct json_object *rv = NULL;
+ const char *sb, *p, *l;
+ regmatch_t pmatch[10];
+ int eflags = 0, res;
+ size_t sl = 0, pl;
+ char *sp = NULL;
+
+ if (!pattern || !subject || !replace)
+ return NULL;
+
+ if (ut_is_type(pattern, T_REGEXP)) {
+ p = json_object_get_string(subject);
+
+ while (true) {
+ res = regexec((regex_t *)tag->tag.data, p, ARRAY_SIZE(pmatch), pmatch, eflags);
+
+ if (res == REG_NOMATCH)
+ break;
+
+ snprintf_append(&sp, &sl, "%s", pmatch[0].rm_so, p);
+
+ if (ut_is_type(replace, T_FUNC) || ut_is_type(replace, T_CFUNC)) {
+ rv = ut_replace_cb(s, off, replace, p, pmatch, ARRAY_SIZE(pmatch), &sp, &sl);
+
+ if (rv) {
+ free(s);
+
+ return rv;
+ }
+ }
+ else {
+ ut_replace_str(s, off, replace, p, pmatch, ARRAY_SIZE(pmatch), &sp, &sl);
+ }
+
+ p += pmatch[0].rm_eo;
+
+ if (tag->is_reg_global)
+ eflags |= REG_NOTBOL;
+ else
+ break;
+ }
+
+ sprintf_append(&sp, &sl, "%s", p);
+ }
+ else {
+ sb = json_object_get_string(subject);
+ p = json_object_get_string(pattern);
+ pl = strlen(p);
+
+ for (l = sb; *sb; sb++) {
+ if (!strncmp(sb, p, pl)) {
+ snprintf_append(&sp, &sl, "%s", sb - l, l);
+
+ pmatch[0].rm_so = sb - l;
+ pmatch[0].rm_eo = pmatch[0].rm_so + pl;
+
+ if (ut_is_type(replace, T_FUNC) || ut_is_type(replace, T_CFUNC)) {
+ rv = ut_replace_cb(s, off, replace, l, pmatch, 1, &sp, &sl);
+
+ if (rv) {
+ free(s);
+
+ return rv;
+ }
+ }
+ else {
+ ut_replace_str(s, off, replace, l, pmatch, 1, &sp, &sl);
+ }
+
+ l = sb + pl;
+ sb += pl - 1;
+ }
+ }
+
+ sprintf_append(&sp, &sl, "%s", l);
+ }
+
+ rv = json_object_new_string_len(sp, sl);
+ free(sp);
+
+ return rv;
+}
+
const struct ut_ops ut = {
.register_function = ut_register_function,
.register_type = ut_register_extended_type,
@@ -1802,8 +2070,11 @@ static const struct { const char *name; ut_c_fn *func; } functions[] = {
{ "require", ut_require },
{ "iptoarr", ut_iptoarr },
{ "arrtoip", ut_arrtoip },
+ { "match", ut_match },
+ { "replace", ut_replace },
};
+
void
ut_lib_init(struct ut_state *state, struct json_object *scope)
{
diff --git a/parser.y b/parser.y
index 938a341..ff5153e 100644
--- a/parser.y
+++ b/parser.y
@@ -295,6 +295,7 @@ primary_exp(A) ::= T_NUMBER(B). { A = B; }
primary_exp(A) ::= T_DOUBLE(B). { A = B; }
primary_exp(A) ::= T_STRING(B). { A = B; }
primary_exp(A) ::= T_LABEL(B). { A = B; }
+primary_exp(A) ::= T_REGEXP(B). { A = B; }
primary_exp(A) ::= T_NULL(B). { A = B; }
primary_exp(A) ::= T_THIS(B). { A = B; }
primary_exp(A) ::= array(B). { A = B; }