/* * BIRD Internet Routing Daemon -- Unix I/O * * (c) 1998--1999 Martin Mares <mj@ucw.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/time.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/fcntl.h> #include <unistd.h> #include <errno.h> #ifndef HAVE_STRUCT_IP_MREQN #include <net/if.h> #endif #include "nest/bird.h" #include "lib/lists.h" #include "lib/resource.h" #include "lib/timer.h" #include "lib/socket.h" #include "lib/event.h" #include "nest/iface.h" #include "lib/unix.h" /* * Timers */ #define NEAR_TIMER_LIMIT 4 #ifdef TIME_T_IS_64BIT #define TIME_INFINITY 0x7fffffffffffffff #else #ifdef TIME_T_IS_SIGNED #define TIME_INFINITY 0x7fffffff #else #define TIME_INFINITY 0xffffffff #endif #endif static list near_timers, far_timers; static bird_clock_t first_far_timer = TIME_INFINITY; bird_clock_t now; static void tm_free(resource *r) { timer *t = (timer *) r; tm_stop(t); } static void tm_dump(resource *r) { timer *t = (timer *) r; debug("(code %p, data %p, ", t->hook, t->data); if (t->randomize) debug("rand %d, ", t->randomize); if (t->recurrent) debug("recur %d, ", t->recurrent); if (t->expires) debug("expires in %d sec)\n", t->expires - now); else debug("inactive)\n"); } static struct resclass tm_class = { "Timer", sizeof(timer), tm_free, tm_dump }; timer * tm_new(pool *p) { timer *t = ralloc(p, &tm_class); t->hook = NULL; t->data = NULL; t->randomize = 0; t->expires = 0; return t; } static inline void tm_insert_near(timer *t) { node *n = HEAD(near_timers); while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires)) n = n->next; insert_node(&t->n, n->prev); } void tm_start(timer *t, unsigned after) { bird_clock_t when; if (t->randomize) after += random() % (t->randomize + 1); when = now + after; if (t->expires == when) return; if (t->expires) rem_node(&t->n); t->expires = when; if (after <= NEAR_TIMER_LIMIT) tm_insert_near(t); else { if (!first_far_timer || first_far_timer > when) first_far_timer = when; add_tail(&far_timers, &t->n); } } void tm_stop(timer *t) { if (t->expires) { rem_node(&t->n); t->expires = 0; } } static void tm_dump_them(char *name, list *l) { node *n; timer *t; debug("%s timers:\n", name); WALK_LIST(n, *l) { t = SKIP_BACK(timer, n, n); debug("%p ", t); tm_dump(&t->r); } debug("\n"); } void tm_dump_all(void) { tm_dump_them("Near", &near_timers); tm_dump_them("Far", &far_timers); } static inline time_t tm_first_shot(void) { time_t x = first_far_timer; if (!EMPTY_LIST(near_timers)) { timer *t = SKIP_BACK(timer, n, HEAD(near_timers)); if (t->expires < x) x = t->expires; } return x; } static void tm_shot(void) { timer *t; node *n, *m; if (first_far_timer <= now) { bird_clock_t limit = now + NEAR_TIMER_LIMIT; first_far_timer = TIME_INFINITY; n = HEAD(far_timers); while (m = n->next) { t = SKIP_BACK(timer, n, n); if (t->expires <= limit) { rem_node(n); tm_insert_near(t); } else if (t->expires < first_far_timer) first_far_timer = t->expires; n = m; } } while ((n = HEAD(near_timers)) -> next) { int delay; t = SKIP_BACK(timer, n, n); if (t->expires > now) break; rem_node(n); delay = t->expires - now; t->expires = 0; if (t->recurrent) { int i = t->recurrent - delay; if (i < 0) i = 0; tm_start(t, i); } t->hook(t); } } bird_clock_t tm_parse_date(char *x) { struct tm tm; int n; time_t t; if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n]) return 0; tm.tm_mon--; tm.tm_year -= 1900; tm.tm_hour = tm.tm_min = tm.tm_sec = 0; t = mktime(&tm); if (t == (time_t) -1) return 0; return t; } void tm_format_date(char *x, bird_clock_t t) { struct tm *tm; tm = localtime(&t); sprintf(x, "%02d-%02d-%04d", tm->tm_mday, tm->tm_mon+1, tm->tm_year+1900); } /* * Sockets */ #ifndef SOL_IP #define SOL_IP IPPROTO_IP #endif static list sock_list; static void sk_free(resource *r) { sock *s = (sock *) r; if (s->fd >= 0) rem_node(&s->n); } static void sk_dump(resource *r) { sock *s = (sock *) r; static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC" }; debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n", sk_type_names[s->type], s->data, s->saddr, s->sport, s->daddr, s->dport, s->tos, s->ttl, s->iface ? s->iface->name : "none"); } static struct resclass sk_class = { "Socket", sizeof(sock), sk_free, sk_dump }; sock * sk_new(pool *p) { sock *s = ralloc(p, &sk_class); s->pool = p; s->data = NULL; s->saddr = s->daddr = IPA_NONE; s->sport = s->dport = 0; s->tos = s->ttl = -1; s->iface = NULL; s->rbuf = NULL; s->rx_hook = NULL; s->rbsize = 0; s->tbuf = NULL; s->tx_hook = NULL; s->tbsize = 0; s->err_hook = NULL; s->fd = -1; return s; } #define ERR(x) do { err = x; goto bad; } while(0) static inline void set_inaddr(struct in_addr *ia, ip_addr a) { a = ipa_hton(a); memcpy(&ia->s_addr, &a, sizeof(a)); } void fill_in_sockaddr(struct sockaddr_in *sa, ip_addr a, unsigned port) { sa->sin_family = AF_INET; sa->sin_port = htons(port); set_inaddr(&sa->sin_addr, a); } void get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port) { if (sa->sin_family != AF_INET) bug("get_sockaddr called for wrong address family"); if (port) *port = ntohs(sa->sin_port); memcpy(a, &sa->sin_addr.s_addr, sizeof(*a)); *a = ipa_ntoh(*a); } static char * sk_setup(sock *s) { int fd = s->fd; int one = 1; char *err; if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) ERR("fcntl(O_NONBLOCK)"); if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0) ERR("IP_TOS"); if (s->ttl >= 0) { if (setsockopt(fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0) ERR("IP_TTL"); if (setsockopt(fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) ERR("SO_DONTROUTE"); } #ifdef IP_PMTUDISC if (s->type != SK_TCP_PASSIVE && s->type != SK_TCP_ACTIVE && s->type != SK_MAGIC) { int dont = IP_PMTUDISC_DONT; if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0) ERR("IP_PMTUDISC"); } #endif /* FIXME: Set send/receive buffers? */ /* FIXME: Set keepalive for TCP connections? */ err = NULL; bad: return err; } static void sk_alloc_bufs(sock *s) { if (!s->rbuf && s->rbsize) s->rbuf = mb_alloc(s->pool, s->rbsize); s->rpos = s->rbuf; if (!s->tbuf && s->tbsize) s->tbuf = mb_alloc(s->pool, s->tbsize); s->tpos = s->ttx = s->tbuf; } void sk_tcp_connected(sock *s) { s->rx_hook(s, 0); s->type = SK_TCP; sk_alloc_bufs(s); } int sk_open(sock *s) { int fd, e; struct sockaddr_in sa; int zero = 0; int one = 1; int type = s->type; int has_src = ipa_nonzero(s->saddr) || s->sport; int has_dest = ipa_nonzero(s->daddr); char *err; switch (type) { case SK_TCP_ACTIVE: case SK_TCP_PASSIVE: fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); break; case SK_UDP: case SK_UDP_MC: fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); break; case SK_IP: case SK_IP_MC: fd = socket(PF_INET, SOCK_RAW, s->dport); break; case SK_MAGIC: fd = s->fd; break; default: bug("sk_open() called for invalid sock type %d", type); } if (fd < 0) die("sk_open: socket: %m"); s->fd = fd; if (err = sk_setup(s)) goto bad; switch (type) { case SK_UDP: case SK_IP: if (s->iface) /* It's a broadcast socket */ if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0) ERR("SO_BROADCAST"); break; case SK_UDP_MC: case SK_IP_MC: { #ifdef HAVE_STRUCT_IP_MREQN struct ip_mreqn mreq; #define mreq_add mreq ASSERT(s->iface && s->iface->addr); mreq.imr_ifindex = s->iface->index; set_inaddr(&mreq.imr_address, s->iface->addr->ip); #else struct in_addr mreq; struct ip_mreq mreq_add; ASSERT(s->iface && s->iface->addr); set_inaddr(&mreq, s->iface->addr->ip); #ifdef SO_BINDTODEVICE { struct ifreq ifr; strcpy(ifr.ifr_name, s->iface->name); if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) ERR("SO_BINDTODEVICE"); mreq_add.imr_interface.s_addr = INADDR_ANY; } #else #error Multicasts not supported on PtP devices /* FIXME: Solve it somehow? */ mreq_add.imr_interface = mreq; #endif #endif set_inaddr(&mreq_add.imr_multiaddr, s->daddr); if (has_dest) { if ( #ifdef IP_DEFAULT_MULTICAST_TTL s->ttl != IP_DEFAULT_MULTICAST_TTL && #endif setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &s->ttl, sizeof(s->ttl)) < 0) ERR("IP_MULTICAST_TTL"); if ( #ifdef IP_DEFAULT_MULTICAST_LOOP IP_DEFAULT_MULTICAST_LOOP && #endif setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &zero, sizeof(zero)) < 0) ERR("IP_MULTICAST_LOOP"); /* This defines where should we send _outgoing_ multicasts */ if (setsockopt(fd, SOL_IP, IP_MULTICAST_IF, &mreq, sizeof(mreq)) < 0) ERR("IP_MULTICAST_IF"); } /* And this one sets interface for _receiving_ multicasts from */ if (has_src && setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, &mreq_add, sizeof(mreq_add)) < 0) ERR("IP_ADD_MEMBERSHIP"); break; } } if (has_src) { int port; if (type == SK_IP || type == SK_IP_MC) port = 0; else { port = s->sport; if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) ERR("SO_REUSEADDR"); } fill_in_sockaddr(&sa, s->saddr, port); if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) ERR("bind"); } fill_in_sockaddr(&sa, s->daddr, s->dport); switch (type) { case SK_TCP_ACTIVE: if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0) sk_tcp_connected(s); else if (errno != EINTR && errno != EAGAIN) ERR("connect"); break; case SK_TCP_PASSIVE: if (listen(fd, 8)) ERR("listen"); break; } sk_alloc_bufs(s); add_tail(&sock_list, &s->n); return 0; bad: log(L_ERR "sk_open: %s: %m", err); close(fd); s->fd = -1; return -1; } static int sk_maybe_write(sock *s) { int e; switch (s->type) { case SK_TCP: case SK_MAGIC: while (s->ttx != s->tpos) { e = write(s->fd, s->ttx, s->tpos - s->ttx); if (e < 0) { if (errno != EINTR && errno != EAGAIN) { log(L_ERR "write: %m"); s->err_hook(s, errno); return -1; } return 0; } s->ttx += e; } s->ttx = s->tpos = s->tbuf; return 1; case SK_UDP: case SK_UDP_MC: case SK_IP: case SK_IP_MC: { struct sockaddr_in sa; if (s->tbuf == s->tpos) return 1; fill_in_sockaddr(&sa, s->faddr, s->fport); e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa)); if (e < 0) { if (errno != EINTR && errno != EAGAIN) { log(L_ERR "sendto: %m"); s->err_hook(s, errno); return -1; } return 0; } s->tpos = s->tbuf; return 1; } default: bug("sk_maybe_write: unknown socket type %d", s->type); } } int sk_send(sock *s, unsigned len) { s->faddr = s->daddr; s->fport = s->dport; s->ttx = s->tbuf; s->tpos = s->tbuf + len; return sk_maybe_write(s); } int sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) { s->faddr = addr; s->fport = port; s->ttx = s->tbuf; s->tpos = s->tbuf + len; return sk_maybe_write(s); } static int sk_read(sock *s) { switch (s->type) { case SK_TCP_ACTIVE: { struct sockaddr_in sa; fill_in_sockaddr(&sa, s->daddr, s->dport); if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0) sk_tcp_connected(s); else if (errno != EINTR && errno != EAGAIN) { log(L_ERR "connect: %m"); s->err_hook(s, errno); } return 0; } case SK_TCP_PASSIVE: { struct sockaddr_in sa; int al = sizeof(sa); int fd = accept(s->fd, (struct sockaddr *) &sa, &al); if (fd >= 0) { sock *t = sk_new(s->pool); char *err; t->type = SK_TCP; t->fd = fd; add_tail(&sock_list, &t->n); s->rx_hook(t, 0); if (err = sk_setup(t)) { log(L_ERR "Incoming connection: %s: %m", err); s->err_hook(s, errno); return 0; } sk_alloc_bufs(t); return 1; } else if (errno != EINTR && errno != EAGAIN) { log(L_ERR "accept: %m"); s->err_hook(s, errno); } return 0; } case SK_TCP: { int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); if (c < 0) { if (errno != EINTR && errno != EAGAIN) { log(L_ERR "read: %m"); s->err_hook(s, errno); } } else if (!c) s->err_hook(s, 0); else { s->rpos += c; if (s->rx_hook(s, s->rpos - s->rbuf)) s->rpos = s->rbuf; return 1; } return 0; } case SK_MAGIC: return s->rx_hook(s, 0); default: { struct sockaddr_in sa; int al = sizeof(sa); int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al); if (e < 0) { if (errno != EINTR && errno != EAGAIN) { log(L_ERR "recvfrom: %m"); s->err_hook(s, errno); } return 0; } s->rpos = s->rbuf + e; get_sockaddr(&sa, &s->faddr, &s->fport); s->rx_hook(s, e); return 1; } } } static void sk_write(sock *s) { while (s->ttx != s->tbuf && sk_maybe_write(s) > 0) s->tx_hook(s); } void sk_dump_all(void) { node *n; sock *s; debug("Open sockets:\n"); WALK_LIST(n, sock_list) { s = SKIP_BACK(sock, n, n); debug("%p ", s); sk_dump(&s->r); } debug("\n"); } #undef ERR /* * Main I/O Loop */ volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */ volatile int async_dump_flag; void io_init(void) { init_list(&near_timers); init_list(&far_timers); init_list(&sock_list); init_list(&global_event_list); krt_io_init(); now = time(NULL); } void io_loop(void) { fd_set rd, wr; struct timeval timo; time_t tout; int hi; sock *s; node *n; /* FIXME: Use poll() if available */ FD_ZERO(&rd); FD_ZERO(&wr); for(;;) { ev_run_list(&global_event_list); now = time(NULL); tout = tm_first_shot(); if (tout <= now) { tm_shot(); continue; } else { timo.tv_sec = tout - now; timo.tv_usec = 0; } hi = 0; WALK_LIST(n, sock_list) { s = SKIP_BACK(sock, n, n); if (s->rx_hook) { FD_SET(s->fd, &rd); if (s->fd > hi) hi = s->fd; } if (s->tx_hook && s->ttx != s->tpos) { FD_SET(s->fd, &wr); if (s->fd > hi) hi = s->fd; } } /* * Yes, this is racy. But even if the signal comes before this test * and entering select(), it gets caught on the next timer tick. */ if (async_config_flag) { async_config(); async_config_flag = 0; continue; } if (async_dump_flag) { async_dump(); async_dump_flag = 0; continue; } if (async_shutdown_flag) { async_shutdown(); async_shutdown_flag = 0; continue; } /* And finally enter select() to find active sockets */ hi = select(hi+1, &rd, &wr, NULL, &timo); if (hi < 0) { if (errno == EINTR || errno == EAGAIN) continue; die("select: %m"); } if (hi) { WALK_LIST(n, sock_list) { s = SKIP_BACK(sock, n, n); if (FD_ISSET(s->fd, &rd)) { FD_CLR(s->fd, &rd); while (sk_read(s)) ; } if (FD_ISSET(s->fd, &wr)) { FD_CLR(s->fd, &wr); sk_write(s); } } } } }