summaryrefslogtreecommitdiff
path: root/sysdep
diff options
context:
space:
mode:
Diffstat (limited to 'sysdep')
-rw-r--r--sysdep/bsd/krt-sock.c5
-rw-r--r--sysdep/linux/netlink.c3
-rw-r--r--sysdep/unix/io.c126
-rw-r--r--sysdep/unix/krt.c46
-rw-r--r--sysdep/unix/log.c11
-rw-r--r--sysdep/unix/main.c2
6 files changed, 100 insertions, 93 deletions
diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c
index c6456838..56026bdd 100644
--- a/sysdep/bsd/krt-sock.c
+++ b/sysdep/bsd/krt-sock.c
@@ -528,9 +528,8 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
e->net = net;
e->u.krt.src = src;
e->u.krt.proto = src2;
-
- /* These are probably too Linux-specific */
- e->u.krt.type = 0;
+ e->u.krt.seen = 0;
+ e->u.krt.best = 0;
e->u.krt.metric = 0;
if (scan)
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index 7b1f2dda..8166d5f5 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -1210,7 +1210,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)
e->net = net;
e->u.krt.src = src;
e->u.krt.proto = i->rtm_protocol;
- e->u.krt.type = i->rtm_type;
+ e->u.krt.seen = 0;
+ e->u.krt.best = 0;
e->u.krt.metric = 0;
if (a[RTA_PRIORITY])
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index afa51be7..4db6abb7 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -19,6 +19,7 @@
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
+#include <poll.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
@@ -41,12 +42,12 @@
#include CONFIG_INCLUDE_SYSIO_H
/* Maximum number of calls of tx handler for one socket in one
- * select iteration. Should be small enough to not monopolize CPU by
+ * poll iteration. Should be small enough to not monopolize CPU by
* one protocol instance.
*/
#define MAX_STEPS 4
-/* Maximum number of calls of rx handler for all sockets in one select
+/* Maximum number of calls of rx handler for all sockets in one poll
iteration. RX callbacks are often much more costly so we limit
this to gen small latencies */
#define MAX_RX_STEPS 4
@@ -1023,7 +1024,6 @@ sk_log_error(sock *s, const char *p)
static list sock_list;
static struct birdsock *current_sock;
static struct birdsock *stored_sock;
-static int sock_recalc_fdsets_p;
static inline sock *
sk_next(sock *s)
@@ -1079,7 +1079,6 @@ sk_free(resource *r)
if (s == stored_sock)
stored_sock = sk_next(s);
rem_node(&s->n);
- sock_recalc_fdsets_p = 1;
}
}
@@ -1277,7 +1276,6 @@ static void
sk_insert(sock *s)
{
add_tail(&sock_list, &s->n);
- sock_recalc_fdsets_p = 1;
}
static void
@@ -1329,18 +1327,6 @@ sk_passive_connected(sock *s, int type)
log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
}
- if (fd >= FD_SETSIZE)
- {
- /* FIXME: Call err_hook instead ? */
- log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s",
- t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL,
- t->dport, "rejected due to FD_SETSIZE limit");
- close(fd);
- t->fd = -1;
- rfree(t);
- return 1;
- }
-
if (sk_setup(t) < 0)
{
/* FIXME: Call err_hook instead ? */
@@ -1416,9 +1402,6 @@ sk_open(sock *s)
if (fd < 0)
ERR("socket");
- if (fd >= FD_SETSIZE)
- ERR2("FD_SETSIZE limit reached");
-
s->fd = fd;
if (sk_setup(s) < 0)
@@ -1696,19 +1679,12 @@ sk_maybe_write(sock *s)
int
sk_rx_ready(sock *s)
{
- fd_set rd, wr;
- struct timeval timo;
int rv;
-
- FD_ZERO(&rd);
- FD_ZERO(&wr);
- FD_SET(s->fd, &rd);
-
- timo.tv_sec = 0;
- timo.tv_usec = 0;
+ struct pollfd pfd = { .fd = s->fd };
+ pfd.events |= POLLIN;
redo:
- rv = select(s->fd+1, &rd, &wr, NULL, &timo);
+ rv = poll(&pfd, 1, 0);
if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
goto redo;
@@ -1777,7 +1753,7 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa,
/* sk_read() and sk_write() are called from BFD's event loop */
int
-sk_read(sock *s)
+sk_read(sock *s, int revents)
{
switch (s->type)
{
@@ -1796,6 +1772,11 @@ sk_read(sock *s)
{
if (errno != EINTR && errno != EAGAIN)
s->err_hook(s, errno);
+ else if (errno == EAGAIN && !(revents & POLLIN))
+ {
+ log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
+ s->err_hook(s, 0);
+ }
}
else if (!c)
s->err_hook(s, 0);
@@ -2068,62 +2049,63 @@ static int short_loops = 0;
void
io_loop(void)
{
- fd_set rd, wr;
- struct timeval timo;
+ int poll_tout;
time_t tout;
- int hi, events;
+ int nfds, events, pout;
sock *s;
node *n;
+ int fdmax = 256;
+ struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
watchdog_start1();
- sock_recalc_fdsets_p = 1;
for(;;)
{
events = ev_run_list(&global_event_list);
+ timers:
update_times();
tout = tm_first_shot();
if (tout <= now)
{
tm_shot();
- continue;
+ goto timers;
}
- timo.tv_sec = events ? 0 : MIN(tout - now, 3);
- timo.tv_usec = 0;
+ poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
io_close_event();
- if (sock_recalc_fdsets_p)
- {
- sock_recalc_fdsets_p = 0;
- FD_ZERO(&rd);
- FD_ZERO(&wr);
- }
-
- hi = 0;
+ nfds = 0;
WALK_LIST(n, sock_list)
{
+ pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
s = SKIP_BACK(sock, n, n);
if (s->rx_hook)
{
- FD_SET(s->fd, &rd);
- if (s->fd > hi)
- hi = s->fd;
+ pfd[nfds].fd = s->fd;
+ pfd[nfds].events |= POLLIN;
}
- else
- FD_CLR(s->fd, &rd);
if (s->tx_hook && s->ttx != s->tpos)
{
- FD_SET(s->fd, &wr);
- if (s->fd > hi)
- hi = s->fd;
+ pfd[nfds].fd = s->fd;
+ pfd[nfds].events |= POLLOUT;
+ }
+ if (pfd[nfds].fd != -1)
+ {
+ s->index = nfds;
+ nfds++;
}
else
- FD_CLR(s->fd, &wr);
+ s->index = -1;
+
+ if (nfds >= fdmax)
+ {
+ fdmax *= 2;
+ pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
+ }
}
/*
* Yes, this is racy. But even if the signal comes before this test
- * and entering select(), it gets caught on the next timer tick.
+ * and entering poll(), it gets caught on the next timer tick.
*/
if (async_config_flag)
@@ -2148,18 +2130,18 @@ io_loop(void)
continue;
}
- /* And finally enter select() to find active sockets */
+ /* And finally enter poll() to find active sockets */
watchdog_stop();
- hi = select(hi+1, &rd, &wr, NULL, &timo);
+ pout = poll(pfd, nfds, poll_tout);
watchdog_start();
- if (hi < 0)
+ if (pout < 0)
{
if (errno == EINTR || errno == EAGAIN)
continue;
- die("select: %m");
+ die("poll: %m");
}
- if (hi)
+ if (pout)
{
/* guaranteed to be non-empty */
current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
@@ -2167,23 +2149,29 @@ io_loop(void)
while (current_sock)
{
sock *s = current_sock;
+ if (s->index == -1)
+ {
+ current_sock = sk_next(s);
+ goto next;
+ }
+
int e;
int steps;
steps = MAX_STEPS;
- if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
+ if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
do
{
steps--;
io_log_event(s->rx_hook, s->data);
- e = sk_read(s);
+ e = sk_read(s, pfd[s->index].revents);
if (s != current_sock)
goto next;
}
while (e && s->rx_hook && steps);
steps = MAX_STEPS;
- if (FD_ISSET(s->fd, &wr))
+ if (pfd[s->index].revents & POLLOUT)
do
{
steps--;
@@ -2210,13 +2198,17 @@ io_loop(void)
while (current_sock && count < MAX_RX_STEPS)
{
sock *s = current_sock;
- int e UNUSED;
+ if (s->index == -1)
+ {
+ current_sock = sk_next(s);
+ goto next2;
+ }
- if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
+ if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
{
count++;
io_log_event(s->rx_hook, s->data);
- e = sk_read(s);
+ sk_read(s, pfd[s->index].revents);
if (s != current_sock)
goto next2;
}
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 7c9bfb24..6531bb28 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -409,46 +409,58 @@ again:
{
rte *e, **ee, *best, **pbest, *old_best;
- old_best = n->routes;
+ /*
+ * Note that old_best may be NULL even if there was an old best route in
+ * the previous step, because it might be replaced in krt_learn_scan().
+ * But in that case there is a new valid best route.
+ */
+
+ old_best = NULL;
best = NULL;
pbest = NULL;
ee = &n->routes;
while (e = *ee)
{
+ if (e->u.krt.best)
+ old_best = e;
+
if (!e->u.krt.seen)
{
*ee = e->next;
rte_free(e);
continue;
}
+
if (!best || best->u.krt.metric > e->u.krt.metric)
{
best = e;
pbest = ee;
}
+
e->u.krt.seen = 0;
+ e->u.krt.best = 0;
ee = &e->next;
}
if (!n->routes)
{
DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen);
if (old_best)
- {
- krt_learn_announce_delete(p, n);
- n->n.flags &= ~KRF_INSTALLED;
- }
+ krt_learn_announce_delete(p, n);
+
FIB_ITERATE_PUT(&fit);
fib_delete(fib, n);
goto again;
}
+
+ best->u.krt.best = 1;
*pbest = best->next;
best->next = n->routes;
n->routes = best;
- if (best != old_best || !(n->n.flags & KRF_INSTALLED) || p->reload)
+
+ if ((best != old_best) || p->reload)
{
DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
krt_learn_announce_update(p, best);
- n->n.flags |= KRF_INSTALLED;
}
else
DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
@@ -507,31 +519,31 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
best = n->routes;
bestp = &n->routes;
for(gg=&n->routes; g=*gg; gg=&g->next)
+ {
if (best->u.krt.metric > g->u.krt.metric)
{
best = g;
bestp = gg;
}
+
+ g->u.krt.best = 0;
+ }
+
if (best)
{
+ best->u.krt.best = 1;
*bestp = best->next;
best->next = n->routes;
n->routes = best;
}
+
if (best != old_best)
{
DBG("krt_learn_async: distributing change\n");
if (best)
- {
- krt_learn_announce_update(p, best);
- n->n.flags |= KRF_INSTALLED;
- }
+ krt_learn_announce_update(p, best);
else
- {
- n->routes = NULL;
- krt_learn_announce_delete(p, n);
- n->n.flags &= ~KRF_INSTALLED;
- }
+ krt_learn_announce_delete(p, n);
}
}
@@ -556,7 +568,7 @@ krt_dump(struct proto *P)
static void
krt_dump_attrs(rte *e)
{
- debug(" [m=%d,p=%d,t=%d]", e->u.krt.metric, e->u.krt.proto, e->u.krt.type);
+ debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto);
}
#endif
diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c
index 8daff034..43d98f7b 100644
--- a/sysdep/unix/log.c
+++ b/sysdep/unix/log.c
@@ -284,17 +284,18 @@ log_switch(int debug, list *l, char *new_syslog_name)
current_log_list = l;
#ifdef HAVE_SYSLOG
- if (current_syslog_name && new_syslog_name &&
- !strcmp(current_syslog_name, new_syslog_name))
+ char *old_syslog_name = current_syslog_name;
+ current_syslog_name = new_syslog_name;
+
+ if (old_syslog_name && new_syslog_name &&
+ !strcmp(old_syslog_name, new_syslog_name))
return;
- if (current_syslog_name)
+ if (old_syslog_name)
closelog();
if (new_syslog_name)
openlog(new_syslog_name, LOG_CONS | LOG_NDELAY, LOG_DAEMON);
-
- current_syslog_name = new_syslog_name;
#endif
}
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 14b3b2f6..691fee2d 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -450,6 +450,7 @@ cli_connect(sock *s, int size UNUSED)
s->err_hook = cli_err;
s->data = c = cli_new(s);
s->pool = c->pool; /* We need to have all the socket buffers allocated in the cli pool */
+ s->fast_rx = 1;
c->rx_pos = c->rx_buf;
c->rx_aux = NULL;
rmove(s, c->pool);
@@ -466,6 +467,7 @@ cli_init_unix(uid_t use_uid, gid_t use_gid)
s->type = SK_UNIX_PASSIVE;
s->rx_hook = cli_connect;
s->rbsize = 1024;
+ s->fast_rx = 1;
/* Return value intentionally ignored */
unlink(path_control_socket);