diff options
author | Robert James Kaes <rjkaes@users.sourceforge.net> | 2002-05-27 01:56:22 +0000 |
---|---|---|
committer | Robert James Kaes <rjkaes@users.sourceforge.net> | 2002-05-27 01:56:22 +0000 |
commit | b11015c2e16d37258271cccb1e6842e58ace5b90 (patch) | |
tree | d3604a715a25582824abab8d3a4c3167a543dc56 | |
parent | 026c7d9a3dc82556bbb40a37598e4032b046eeb9 (diff) |
Added a copyright for James E. Flemer since these are his changes.
(filter_init): Added code to handle both host and URLs. Also include code to use extended regular expressions.
(filter_domain): The old filter_url function has been renamed filter_domain().
(filter_url): This function now actually filters complete URLs.
-rw-r--r-- | src/filter.c | 73 | ||||
-rw-r--r-- | src/filter.h | 5 |
2 files changed, 61 insertions, 17 deletions
diff --git a/src/filter.c b/src/filter.c index bc1d679..853c4b6 100644 --- a/src/filter.c +++ b/src/filter.c @@ -1,6 +1,7 @@ -/* $Id: filter.c,v 1.10 2002-05-23 18:20:27 rjkaes Exp $ +/* $Id: filter.c,v 1.11 2002-05-27 01:56:22 rjkaes Exp $ * * Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca) + * Copyright (c) 2002 James E. Flemer (jflemer@acm.jhu.edu) * * A substring of the domain to be filtered goes into the file * pointed at by DEFAULT_FILTER. @@ -21,6 +22,9 @@ #include "filter.h" #include "heap.h" #include "regexp.h" +#include "reqs.h" + +#define FILTER_BUFFER_LEN (512) static int err; @@ -33,21 +37,28 @@ struct filter_list { static struct filter_list *fl = NULL; static int already_init = 0; -/* initializes a linked list of strings containing hosts to be filtered */ +/* + * Initializes a linked list of strings containing hosts/urls to be filtered + */ void filter_init(void) { FILE *fd; struct filter_list *p; - char buf[255]; - char *s; + char buf[FILTER_BUFFER_LEN]; + char *s, *t; + int cflags; if (!fl && !already_init) { fd = fopen(config.filter, "r"); if (fd) { p = NULL; - while (fgets(buf, 255, fd)) { + cflags = REG_NEWLINE | REG_NOSUB; + if (config.filter_extended) + cflags |= REG_EXTENDED; + + while (fgets(buf, FILTER_BUFFER_LEN, fd)) { s = buf; if (!p) /* head of list */ fl = p = @@ -62,23 +73,38 @@ filter_init(void) p = p->next; } - /* replace first whitespace with \0 */ - while (*s++) - if (isspace((unsigned char) *s)) - *s = '\0'; + /* strip trailing whitespace & comments */ + t = s; + while (*s && *s != '#') { + if (!isspace((unsigned char)*(s++))) + t = s; + } + *t = '\0'; + + /* skip leading whitespace */ + s = buf; + while (*s && isspace((unsigned char)*s)) + s++; + + /* skip blank lines and comments */ + if (*s == '\0') + continue; - p->pat = safestrdup(buf); + p->pat = safestrdup(s); p->cpat = safemalloc(sizeof(regex_t)); - if ((err = - regcomp(p->cpat, p->pat, - REG_NEWLINE | REG_NOSUB)) != 0) { + if ((err = regcomp(p->cpat, p->pat, cflags)) != 0) { fprintf(stderr, "Bad regex in %s: %s\n", config.filter, p->pat); exit(EX_DATAERR); } } - already_init = 1; + if (ferror(fd)) { + perror("fgets"); + exit(EX_DATAERR); + } fclose(fd); + + already_init = 1; } } } @@ -104,7 +130,7 @@ filter_destroy(void) /* returns 0 if host is not an element of filter list, non-zero otherwise */ int -filter_url(char *host) +filter_domain(const char *host) { struct filter_list *p; char *s, *port; @@ -130,3 +156,20 @@ filter_url(char *host) safefree(s); return (result); } + +/* returns 0 if url is not an element of filter list, non-zero otherwise */ +int +filter_url(const char *url) +{ + struct filter_list *p; + + if (!fl || !already_init) + return (0); + + for (p = fl; p; p = p->next) { + if (!regexec(p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0)) { + return 1; + } + } + return 0; +} diff --git a/src/filter.h b/src/filter.h index 8e7aff8..4d6364e 100644 --- a/src/filter.h +++ b/src/filter.h @@ -1,4 +1,4 @@ -/* $Id: filter.h,v 1.3 2000-11-23 04:46:25 rjkaes Exp $ +/* $Id: filter.h,v 1.4 2002-05-27 01:56:22 rjkaes Exp $ * * See 'filter.c' for a detailed description. * @@ -20,6 +20,7 @@ extern void filter_init(void); extern void filter_destroy(void); -extern int filter_url(char *host); +extern int filter_domain(const char *host); +extern int filter_url(const char *url); #endif |