summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRobert James Kaes <rjkaes@users.sourceforge.net>2002-05-27 01:56:22 +0000
committerRobert James Kaes <rjkaes@users.sourceforge.net>2002-05-27 01:56:22 +0000
commitb11015c2e16d37258271cccb1e6842e58ace5b90 (patch)
treed3604a715a25582824abab8d3a4c3167a543dc56 /src
parent026c7d9a3dc82556bbb40a37598e4032b046eeb9 (diff)
Added a copyright for James E. Flemer since these are his changes.
(filter_init): Added code to handle both host and URLs. Also include code to use extended regular expressions. (filter_domain): The old filter_url function has been renamed filter_domain(). (filter_url): This function now actually filters complete URLs.
Diffstat (limited to 'src')
-rw-r--r--src/filter.c73
-rw-r--r--src/filter.h5
2 files changed, 61 insertions, 17 deletions
diff --git a/src/filter.c b/src/filter.c
index bc1d679..853c4b6 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -1,6 +1,7 @@
-/* $Id: filter.c,v 1.10 2002-05-23 18:20:27 rjkaes Exp $
+/* $Id: filter.c,v 1.11 2002-05-27 01:56:22 rjkaes Exp $
*
* Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca)
+ * Copyright (c) 2002 James E. Flemer (jflemer@acm.jhu.edu)
*
* A substring of the domain to be filtered goes into the file
* pointed at by DEFAULT_FILTER.
@@ -21,6 +22,9 @@
#include "filter.h"
#include "heap.h"
#include "regexp.h"
+#include "reqs.h"
+
+#define FILTER_BUFFER_LEN (512)
static int err;
@@ -33,21 +37,28 @@ struct filter_list {
static struct filter_list *fl = NULL;
static int already_init = 0;
-/* initializes a linked list of strings containing hosts to be filtered */
+/*
+ * Initializes a linked list of strings containing hosts/urls to be filtered
+ */
void
filter_init(void)
{
FILE *fd;
struct filter_list *p;
- char buf[255];
- char *s;
+ char buf[FILTER_BUFFER_LEN];
+ char *s, *t;
+ int cflags;
if (!fl && !already_init) {
fd = fopen(config.filter, "r");
if (fd) {
p = NULL;
- while (fgets(buf, 255, fd)) {
+ cflags = REG_NEWLINE | REG_NOSUB;
+ if (config.filter_extended)
+ cflags |= REG_EXTENDED;
+
+ while (fgets(buf, FILTER_BUFFER_LEN, fd)) {
s = buf;
if (!p) /* head of list */
fl = p =
@@ -62,23 +73,38 @@ filter_init(void)
p = p->next;
}
- /* replace first whitespace with \0 */
- while (*s++)
- if (isspace((unsigned char) *s))
- *s = '\0';
+ /* strip trailing whitespace & comments */
+ t = s;
+ while (*s && *s != '#') {
+ if (!isspace((unsigned char)*(s++)))
+ t = s;
+ }
+ *t = '\0';
+
+ /* skip leading whitespace */
+ s = buf;
+ while (*s && isspace((unsigned char)*s))
+ s++;
+
+ /* skip blank lines and comments */
+ if (*s == '\0')
+ continue;
- p->pat = safestrdup(buf);
+ p->pat = safestrdup(s);
p->cpat = safemalloc(sizeof(regex_t));
- if ((err =
- regcomp(p->cpat, p->pat,
- REG_NEWLINE | REG_NOSUB)) != 0) {
+ if ((err = regcomp(p->cpat, p->pat, cflags)) != 0) {
fprintf(stderr, "Bad regex in %s: %s\n",
config.filter, p->pat);
exit(EX_DATAERR);
}
}
- already_init = 1;
+ if (ferror(fd)) {
+ perror("fgets");
+ exit(EX_DATAERR);
+ }
fclose(fd);
+
+ already_init = 1;
}
}
}
@@ -104,7 +130,7 @@ filter_destroy(void)
/* returns 0 if host is not an element of filter list, non-zero otherwise */
int
-filter_url(char *host)
+filter_domain(const char *host)
{
struct filter_list *p;
char *s, *port;
@@ -130,3 +156,20 @@ filter_url(char *host)
safefree(s);
return (result);
}
+
+/* returns 0 if url is not an element of filter list, non-zero otherwise */
+int
+filter_url(const char *url)
+{
+ struct filter_list *p;
+
+ if (!fl || !already_init)
+ return (0);
+
+ for (p = fl; p; p = p->next) {
+ if (!regexec(p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0)) {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/src/filter.h b/src/filter.h
index 8e7aff8..4d6364e 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -1,4 +1,4 @@
-/* $Id: filter.h,v 1.3 2000-11-23 04:46:25 rjkaes Exp $
+/* $Id: filter.h,v 1.4 2002-05-27 01:56:22 rjkaes Exp $
*
* See 'filter.c' for a detailed description.
*
@@ -20,6 +20,7 @@
extern void filter_init(void);
extern void filter_destroy(void);
-extern int filter_url(char *host);
+extern int filter_domain(const char *host);
+extern int filter_url(const char *url);
#endif