diff options
author | rofl0r <rofl0r@users.noreply.github.com> | 2020-09-05 19:32:21 +0100 |
---|---|---|
committer | rofl0r <rofl0r@users.noreply.github.com> | 2020-09-05 19:42:34 +0100 |
commit | 233ce6de3b4a23f44c0e8a1acdc76cc98be00358 (patch) | |
tree | fd44a597f56b7be28851b6772c61ab059d4f0b6a | |
parent | c4dc3ba007fe34d6b7704ffb5ec812a436c326e7 (diff) |
filter: reduce memory usage, fix OOM crashes
* check return values of memory allocation and abort gracefully
in out-of-memory situations
* use sblist (linear dynamic array) instead of linked list
- this removes one pointer per filter rule
- removes need to manually allocate/free every single list item
(instead block allocation is used)
- simplifies code
* remove storage of (unused) input rule
- removes one char* pointer per filter rule
- removes storage of the raw bytes of each filter rule
* add line number to display on out-of-memory/invalid regex situation
* replace duplicate filter_domain()/filter_host() code with a single
function filter_run()
- reduces code size and management effort
with these improvements, >1 million regex rules can be loaded with
4 GB of RAM, whereas previously it crashed with about 950K.
the list for testing was assembled from
http://www.shallalist.de/Downloads/shallalist.tar.gz
closes #20
-rw-r--r-- | src/filter.c | 93 | ||||
-rw-r--r-- | src/filter.h | 3 | ||||
-rw-r--r-- | src/reqs.c | 4 |
3 files changed, 35 insertions, 65 deletions
diff --git a/src/filter.c b/src/filter.c index 206bf31..8a0b085 100644 --- a/src/filter.c +++ b/src/filter.c @@ -29,18 +29,17 @@ #include "log.h" #include "reqs.h" #include "conf.h" +#include "sblist.h" #define FILTER_BUFFER_LEN (512) static int err; struct filter_list { - struct filter_list *next; - char *pat; - regex_t *cpat; + regex_t cpatb; }; -static struct filter_list *fl = NULL; +static sblist *fl = NULL; static int already_init = 0; static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW; @@ -50,10 +49,10 @@ static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW; void filter_init (void) { FILE *fd; - struct filter_list *p; + struct filter_list fe; char buf[FILTER_BUFFER_LEN]; char *s, *start; - int cflags; + int cflags, lineno = 0; if (fl || already_init) { return; @@ -64,8 +63,6 @@ void filter_init (void) return; } - p = NULL; - cflags = REG_NEWLINE | REG_NOSUB; if (config->filter_extended) cflags |= REG_EXTENDED; @@ -73,6 +70,7 @@ void filter_init (void) cflags |= REG_ICASE; while (fgets (buf, FILTER_BUFFER_LEN, fd)) { + ++lineno; /* skip leading whitespace */ s = buf; while (*s && isspace ((unsigned char) *s)) @@ -104,24 +102,22 @@ void filter_init (void) if (*s == '\0') continue; - if (!p) /* head of list */ - fl = p = - (struct filter_list *) - safecalloc (1, sizeof (struct filter_list)); - else { /* next entry */ - p->next = - (struct filter_list *) - safecalloc (1, sizeof (struct filter_list)); - p = p->next; - } + if (!fl) fl = sblist_new(sizeof(struct filter_list), + 4096/sizeof(struct filter_list)); - p->pat = safestrdup (s); - p->cpat = (regex_t *) safemalloc (sizeof (regex_t)); - err = regcomp (p->cpat, p->pat, cflags); + err = regcomp (&fe.cpatb, s, cflags); if (err != 0) { + if (err == REG_ESPACE) goto oom; + fprintf (stderr, + "Bad regex in %s: line %d - %s\n", + config->filter, lineno, s); + exit (EX_DATAERR); + } + if (!sblist_add(fl, &fe)) { + oom:; fprintf (stderr, - "Bad regex in %s: %s\n", - config->filter, p->pat); + "out of memory parsing filter file %s: line %d\n", + config->filter, lineno); exit (EX_DATAERR); } } @@ -137,15 +133,16 @@ void filter_init (void) /* unlink the list */ void filter_destroy (void) { - struct filter_list *p, *q; + struct filter_list *p; + size_t i; if (already_init) { - for (p = q = fl; p; p = q) { - regfree (p->cpat); - safefree (p->cpat); - safefree (p->pat); - q = p->next; - safefree (p); + if (fl) { + for (i = 0; i < sblist_getsize(fl); ++i) { + p = sblist_get(fl, i); + regfree (&p->cpatb); + } + sblist_free(fl); } fl = NULL; already_init = 0; @@ -165,45 +162,19 @@ void filter_reload (void) } /* Return 0 to allow, non-zero to block */ -int filter_domain (const char *host) -{ - struct filter_list *p; - int result; - - if (!fl || !already_init) - goto COMMON_EXIT; - - for (p = fl; p; p = p->next) { - result = - regexec (p->cpat, host, (size_t) 0, (regmatch_t *) 0, 0); - - if (result == 0) { - if (default_policy == FILTER_DEFAULT_ALLOW) - return 1; - else - return 0; - } - } - -COMMON_EXIT: - if (default_policy == FILTER_DEFAULT_ALLOW) - return 0; - else - return 1; -} - -/* returns 0 to allow, non-zero to block */ -int filter_url (const char *url) +int filter_run (const char *str) { struct filter_list *p; + size_t i; int result; if (!fl || !already_init) goto COMMON_EXIT; - for (p = fl; p; p = p->next) { + for (i = 0; i < sblist_getsize(fl); ++i) { + p = sblist_get(fl, i); result = - regexec (p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0); + regexec (&p->cpatb, str, (size_t) 0, (regmatch_t *) 0, 0); if (result == 0) { if (default_policy == FILTER_DEFAULT_ALLOW) diff --git a/src/filter.h b/src/filter.h index 8c6f270..8a7575b 100644 --- a/src/filter.h +++ b/src/filter.h @@ -29,8 +29,7 @@ typedef enum { extern void filter_init (void); extern void filter_destroy (void); extern void filter_reload (void); -extern int filter_domain (const char *host); -extern int filter_url (const char *url); +extern int filter_run (const char *str); extern void filter_set_default_policy (filter_policy_t policy); @@ -457,9 +457,9 @@ BAD_REQUEST_ERROR: */ if (config->filter) { if (config->filter_url) - ret = filter_url (url); + ret = filter_run (url); else - ret = filter_domain (request->host); + ret = filter_run (request->host); if (ret) { update_stats (STAT_DENIED); |