diff options
Diffstat (limited to 'sysdep/unix/io.c')
-rw-r--r-- | sysdep/unix/io.c | 126 |
1 files changed, 59 insertions, 67 deletions
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index cbfb47d5..37e26c9b 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -19,6 +19,7 @@ #include <sys/socket.h> #include <sys/uio.h> #include <sys/un.h> +#include <poll.h> #include <unistd.h> #include <fcntl.h> #include <errno.h> @@ -41,12 +42,12 @@ #include "lib/sysio.h" /* Maximum number of calls of tx handler for one socket in one - * select iteration. Should be small enough to not monopolize CPU by + * poll iteration. Should be small enough to not monopolize CPU by * one protocol instance. */ #define MAX_STEPS 4 -/* Maximum number of calls of rx handler for all sockets in one select +/* Maximum number of calls of rx handler for all sockets in one poll iteration. RX callbacks are often much more costly so we limit this to gen small latencies */ #define MAX_RX_STEPS 4 @@ -1023,7 +1024,6 @@ sk_log_error(sock *s, const char *p) static list sock_list; static struct birdsock *current_sock; static struct birdsock *stored_sock; -static int sock_recalc_fdsets_p; static inline sock * sk_next(sock *s) @@ -1079,7 +1079,6 @@ sk_free(resource *r) if (s == stored_sock) stored_sock = sk_next(s); rem_node(&s->n); - sock_recalc_fdsets_p = 1; } } @@ -1277,7 +1276,6 @@ static void sk_insert(sock *s) { add_tail(&sock_list, &s->n); - sock_recalc_fdsets_p = 1; } static void @@ -1329,18 +1327,6 @@ sk_passive_connected(sock *s, int type) log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); } - if (fd >= FD_SETSIZE) - { - /* FIXME: Call err_hook instead ? */ - log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s", - t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL, - t->dport, "rejected due to FD_SETSIZE limit"); - close(fd); - t->fd = -1; - rfree(t); - return 1; - } - if (sk_setup(t) < 0) { /* FIXME: Call err_hook instead ? */ @@ -1416,9 +1402,6 @@ sk_open(sock *s) if (fd < 0) ERR("socket"); - if (fd >= FD_SETSIZE) - ERR2("FD_SETSIZE limit reached"); - s->fd = fd; if (sk_setup(s) < 0) @@ -1696,19 +1679,12 @@ sk_maybe_write(sock *s) int sk_rx_ready(sock *s) { - fd_set rd, wr; - struct timeval timo; int rv; - - FD_ZERO(&rd); - FD_ZERO(&wr); - FD_SET(s->fd, &rd); - - timo.tv_sec = 0; - timo.tv_usec = 0; + struct pollfd pfd = { .fd = s->fd }; + pfd.events |= POLLIN; redo: - rv = select(s->fd+1, &rd, &wr, NULL, &timo); + rv = poll(&pfd, 1, 0); if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) goto redo; @@ -1777,7 +1753,7 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa, /* sk_read() and sk_write() are called from BFD's event loop */ int -sk_read(sock *s) +sk_read(sock *s, int revents) { switch (s->type) { @@ -1796,6 +1772,11 @@ sk_read(sock *s) { if (errno != EINTR && errno != EAGAIN) s->err_hook(s, errno); + else if (errno == EAGAIN && !(revents & POLLIN)) + { + log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents); + s->err_hook(s, 0); + } } else if (!c) s->err_hook(s, 0); @@ -2068,62 +2049,63 @@ static int short_loops = 0; void io_loop(void) { - fd_set rd, wr; - struct timeval timo; + int poll_tout; time_t tout; - int hi, events; + int nfds, events, pout; sock *s; node *n; + int fdmax = 256; + struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd)); watchdog_start1(); - sock_recalc_fdsets_p = 1; for(;;) { events = ev_run_list(&global_event_list); + timers: update_times(); tout = tm_first_shot(); if (tout <= now) { tm_shot(); - continue; + goto timers; } - timo.tv_sec = events ? 0 : MIN(tout - now, 3); - timo.tv_usec = 0; + poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */ io_close_event(); - if (sock_recalc_fdsets_p) - { - sock_recalc_fdsets_p = 0; - FD_ZERO(&rd); - FD_ZERO(&wr); - } - - hi = 0; + nfds = 0; WALK_LIST(n, sock_list) { + pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */ s = SKIP_BACK(sock, n, n); if (s->rx_hook) { - FD_SET(s->fd, &rd); - if (s->fd > hi) - hi = s->fd; + pfd[nfds].fd = s->fd; + pfd[nfds].events |= POLLIN; } - else - FD_CLR(s->fd, &rd); if (s->tx_hook && s->ttx != s->tpos) { - FD_SET(s->fd, &wr); - if (s->fd > hi) - hi = s->fd; + pfd[nfds].fd = s->fd; + pfd[nfds].events |= POLLOUT; + } + if (pfd[nfds].fd != -1) + { + s->index = nfds; + nfds++; } else - FD_CLR(s->fd, &wr); + s->index = -1; + + if (nfds >= fdmax) + { + fdmax *= 2; + pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd)); + } } /* * Yes, this is racy. But even if the signal comes before this test - * and entering select(), it gets caught on the next timer tick. + * and entering poll(), it gets caught on the next timer tick. */ if (async_config_flag) @@ -2148,18 +2130,18 @@ io_loop(void) continue; } - /* And finally enter select() to find active sockets */ + /* And finally enter poll() to find active sockets */ watchdog_stop(); - hi = select(hi+1, &rd, &wr, NULL, &timo); + pout = poll(pfd, nfds, poll_tout); watchdog_start(); - if (hi < 0) + if (pout < 0) { if (errno == EINTR || errno == EAGAIN) continue; - die("select: %m"); + die("poll: %m"); } - if (hi) + if (pout) { /* guaranteed to be non-empty */ current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); @@ -2167,23 +2149,29 @@ io_loop(void) while (current_sock) { sock *s = current_sock; + if (s->index == -1) + { + current_sock = sk_next(s); + goto next; + } + int e; int steps; steps = MAX_STEPS; - if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) + if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) do { steps--; io_log_event(s->rx_hook, s->data); - e = sk_read(s); + e = sk_read(s, pfd[s->index].revents); if (s != current_sock) goto next; } while (e && s->rx_hook && steps); steps = MAX_STEPS; - if (FD_ISSET(s->fd, &wr)) + if (pfd[s->index].revents & POLLOUT) do { steps--; @@ -2210,13 +2198,17 @@ io_loop(void) while (current_sock && count < MAX_RX_STEPS) { sock *s = current_sock; - int e UNUSED; + if (s->index == -1) + { + current_sock = sk_next(s); + goto next2; + } - if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) + if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) { count++; io_log_event(s->rx_hook, s->data); - e = sk_read(s); + sk_read(s, pfd[s->index].revents); if (s != current_sock) goto next2; } |