summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorrofl0r <rofl0r@users.noreply.github.com>2020-09-05 19:32:21 +0100
committerrofl0r <rofl0r@users.noreply.github.com>2020-09-05 19:42:34 +0100
commit233ce6de3b4a23f44c0e8a1acdc76cc98be00358 (patch)
treefd44a597f56b7be28851b6772c61ab059d4f0b6a
parentc4dc3ba007fe34d6b7704ffb5ec812a436c326e7 (diff)
filter: reduce memory usage, fix OOM crashes
* check return values of memory allocation and abort gracefully in out-of-memory situations * use sblist (linear dynamic array) instead of linked list - this removes one pointer per filter rule - removes need to manually allocate/free every single list item (instead block allocation is used) - simplifies code * remove storage of (unused) input rule - removes one char* pointer per filter rule - removes storage of the raw bytes of each filter rule * add line number to display on out-of-memory/invalid regex situation * replace duplicate filter_domain()/filter_host() code with a single function filter_run() - reduces code size and management effort with these improvements, >1 million regex rules can be loaded with 4 GB of RAM, whereas previously it crashed with about 950K. the list for testing was assembled from http://www.shallalist.de/Downloads/shallalist.tar.gz closes #20
-rw-r--r--src/filter.c93
-rw-r--r--src/filter.h3
-rw-r--r--src/reqs.c4
3 files changed, 35 insertions, 65 deletions
diff --git a/src/filter.c b/src/filter.c
index 206bf31..8a0b085 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -29,18 +29,17 @@
#include "log.h"
#include "reqs.h"
#include "conf.h"
+#include "sblist.h"
#define FILTER_BUFFER_LEN (512)
static int err;
struct filter_list {
- struct filter_list *next;
- char *pat;
- regex_t *cpat;
+ regex_t cpatb;
};
-static struct filter_list *fl = NULL;
+static sblist *fl = NULL;
static int already_init = 0;
static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
@@ -50,10 +49,10 @@ static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
void filter_init (void)
{
FILE *fd;
- struct filter_list *p;
+ struct filter_list fe;
char buf[FILTER_BUFFER_LEN];
char *s, *start;
- int cflags;
+ int cflags, lineno = 0;
if (fl || already_init) {
return;
@@ -64,8 +63,6 @@ void filter_init (void)
return;
}
- p = NULL;
-
cflags = REG_NEWLINE | REG_NOSUB;
if (config->filter_extended)
cflags |= REG_EXTENDED;
@@ -73,6 +70,7 @@ void filter_init (void)
cflags |= REG_ICASE;
while (fgets (buf, FILTER_BUFFER_LEN, fd)) {
+ ++lineno;
/* skip leading whitespace */
s = buf;
while (*s && isspace ((unsigned char) *s))
@@ -104,24 +102,22 @@ void filter_init (void)
if (*s == '\0')
continue;
- if (!p) /* head of list */
- fl = p =
- (struct filter_list *)
- safecalloc (1, sizeof (struct filter_list));
- else { /* next entry */
- p->next =
- (struct filter_list *)
- safecalloc (1, sizeof (struct filter_list));
- p = p->next;
- }
+ if (!fl) fl = sblist_new(sizeof(struct filter_list),
+ 4096/sizeof(struct filter_list));
- p->pat = safestrdup (s);
- p->cpat = (regex_t *) safemalloc (sizeof (regex_t));
- err = regcomp (p->cpat, p->pat, cflags);
+ err = regcomp (&fe.cpatb, s, cflags);
if (err != 0) {
+ if (err == REG_ESPACE) goto oom;
+ fprintf (stderr,
+ "Bad regex in %s: line %d - %s\n",
+ config->filter, lineno, s);
+ exit (EX_DATAERR);
+ }
+ if (!sblist_add(fl, &fe)) {
+ oom:;
fprintf (stderr,
- "Bad regex in %s: %s\n",
- config->filter, p->pat);
+ "out of memory parsing filter file %s: line %d\n",
+ config->filter, lineno);
exit (EX_DATAERR);
}
}
@@ -137,15 +133,16 @@ void filter_init (void)
/* unlink the list */
void filter_destroy (void)
{
- struct filter_list *p, *q;
+ struct filter_list *p;
+ size_t i;
if (already_init) {
- for (p = q = fl; p; p = q) {
- regfree (p->cpat);
- safefree (p->cpat);
- safefree (p->pat);
- q = p->next;
- safefree (p);
+ if (fl) {
+ for (i = 0; i < sblist_getsize(fl); ++i) {
+ p = sblist_get(fl, i);
+ regfree (&p->cpatb);
+ }
+ sblist_free(fl);
}
fl = NULL;
already_init = 0;
@@ -165,45 +162,19 @@ void filter_reload (void)
}
/* Return 0 to allow, non-zero to block */
-int filter_domain (const char *host)
-{
- struct filter_list *p;
- int result;
-
- if (!fl || !already_init)
- goto COMMON_EXIT;
-
- for (p = fl; p; p = p->next) {
- result =
- regexec (p->cpat, host, (size_t) 0, (regmatch_t *) 0, 0);
-
- if (result == 0) {
- if (default_policy == FILTER_DEFAULT_ALLOW)
- return 1;
- else
- return 0;
- }
- }
-
-COMMON_EXIT:
- if (default_policy == FILTER_DEFAULT_ALLOW)
- return 0;
- else
- return 1;
-}
-
-/* returns 0 to allow, non-zero to block */
-int filter_url (const char *url)
+int filter_run (const char *str)
{
struct filter_list *p;
+ size_t i;
int result;
if (!fl || !already_init)
goto COMMON_EXIT;
- for (p = fl; p; p = p->next) {
+ for (i = 0; i < sblist_getsize(fl); ++i) {
+ p = sblist_get(fl, i);
result =
- regexec (p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0);
+ regexec (&p->cpatb, str, (size_t) 0, (regmatch_t *) 0, 0);
if (result == 0) {
if (default_policy == FILTER_DEFAULT_ALLOW)
diff --git a/src/filter.h b/src/filter.h
index 8c6f270..8a7575b 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -29,8 +29,7 @@ typedef enum {
extern void filter_init (void);
extern void filter_destroy (void);
extern void filter_reload (void);
-extern int filter_domain (const char *host);
-extern int filter_url (const char *url);
+extern int filter_run (const char *str);
extern void filter_set_default_policy (filter_policy_t policy);
diff --git a/src/reqs.c b/src/reqs.c
index 859ac6b..c58bbed 100644
--- a/src/reqs.c
+++ b/src/reqs.c
@@ -457,9 +457,9 @@ BAD_REQUEST_ERROR:
*/
if (config->filter) {
if (config->filter_url)
- ret = filter_url (url);
+ ret = filter_run (url);
else
- ret = filter_domain (request->host);
+ ret = filter_run (request->host);
if (ret) {
update_stats (STAT_DENIED);