diff options
Diffstat (limited to 'sysdep')
-rw-r--r-- | sysdep/autoconf.h.in | 4 | ||||
-rw-r--r-- | sysdep/bsd/Modules | 1 | ||||
-rw-r--r-- | sysdep/bsd/krt-sock.Y | 32 | ||||
-rw-r--r-- | sysdep/bsd/krt-sock.c | 510 | ||||
-rw-r--r-- | sysdep/bsd/krt-sys.h | 15 | ||||
-rw-r--r-- | sysdep/bsd/sysio.h | 94 | ||||
-rw-r--r-- | sysdep/cf/bsd-v6.h | 2 | ||||
-rw-r--r-- | sysdep/cf/bsd.h | 2 | ||||
-rw-r--r-- | sysdep/config.h | 3 | ||||
-rw-r--r-- | sysdep/linux/krt-sys.h | 4 | ||||
-rw-r--r-- | sysdep/linux/netlink.Y | 6 | ||||
-rw-r--r-- | sysdep/linux/netlink.c | 26 | ||||
-rw-r--r-- | sysdep/linux/sysio.h | 57 | ||||
-rw-r--r-- | sysdep/unix/config.Y | 33 | ||||
-rw-r--r-- | sysdep/unix/io.c | 94 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 174 | ||||
-rw-r--r-- | sysdep/unix/krt.h | 16 | ||||
-rw-r--r-- | sysdep/unix/log.c | 103 | ||||
-rw-r--r-- | sysdep/unix/main.c | 198 | ||||
-rw-r--r-- | sysdep/unix/timer.h | 1 | ||||
-rw-r--r-- | sysdep/unix/unix.h | 7 |
21 files changed, 988 insertions, 394 deletions
diff --git a/sysdep/autoconf.h.in b/sysdep/autoconf.h.in index ac6f7a87..a9e46e27 100644 --- a/sysdep/autoconf.h.in +++ b/sysdep/autoconf.h.in @@ -39,10 +39,14 @@ #undef CONFIG_STATIC #undef CONFIG_RIP #undef CONFIG_RADV +#undef CONFIG_BFD #undef CONFIG_BGP #undef CONFIG_OSPF #undef CONFIG_PIPE +/* We use multithreading */ +#undef USE_PTHREADS + /* We have <syslog.h> and syslog() */ #undef HAVE_SYSLOG diff --git a/sysdep/bsd/Modules b/sysdep/bsd/Modules index 3729587d..96455db7 100644 --- a/sysdep/bsd/Modules +++ b/sysdep/bsd/Modules @@ -1,3 +1,4 @@ krt-sock.c +krt-sock.Y krt-sys.h sysio.h diff --git a/sysdep/bsd/krt-sock.Y b/sysdep/bsd/krt-sock.Y new file mode 100644 index 00000000..0218f188 --- /dev/null +++ b/sysdep/bsd/krt-sock.Y @@ -0,0 +1,32 @@ +/* + * BIRD -- BSD Kernel Syncer Configuration + * + * (c) 1999--2000 Martin Mares <mj@ucw.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +CF_HDR + +CF_DECLS + +CF_KEYWORDS(KERNEL, TABLE) + +CF_GRAMMAR + +CF_ADDTO(kern_proto, kern_proto kern_sys_item ';') + +kern_sys_item: + KERNEL TABLE expr { + if ($3 && (krt_max_tables == 1)) + cf_error("Multiple kernel routing tables not supported"); + if ($3 < 0 || $3 >= krt_max_tables) + cf_error("Kernel table id must be in range 0-%d", krt_max_tables - 1); + + THIS_KRT->sys.table_id = $3; + } + ; + +CF_CODE + +CF_END diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 8d45cbfe..69a476d9 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -1,5 +1,5 @@ /* - * BIRD -- Unix Routing Table Syncing + * BIRD -- BSD Routing Table Syncing * * (c) 2004 Ondrej Filip <feela@network.cz> * @@ -7,6 +7,7 @@ */ #include <stdio.h> +#include <stdlib.h> #include <ctype.h> #include <fcntl.h> #include <unistd.h> @@ -34,18 +35,112 @@ #include "lib/socket.h" -#ifndef RTAX_MAX -#define RTAX_MAX 8 +/* + * There are significant differences in multiple tables support between BSD variants. + * + * OpenBSD has table_id field for routes in route socket protocol, therefore all + * tables could be managed by one kernel socket. FreeBSD lacks such field, + * therefore multiple sockets (locked to specific table using SO_SETFIB socket + * option) must be used. + * + * Both FreeBSD and OpenBSD uses separate scans for each table. In OpenBSD, + * table_id is specified explicitly as sysctl scan argument, while in FreeBSD it + * is handled implicitly by changing default table using setfib() syscall. + * + * KRT_SHARED_SOCKET - use shared kernel socked instead of one for each krt_proto + * KRT_USE_SETFIB_SCAN - use setfib() for sysctl() route scan + * KRT_USE_SETFIB_SOCK - use SO_SETFIB socket option for kernel sockets + * KRT_USE_SYSCTL_7 - use 7-th arg of sysctl() as table id for route scans + * KRT_USE_SYSCTL_NET_FIBS - use net.fibs sysctl() for dynamic max number of fibs + */ + +#ifdef __FreeBSD__ +#define KRT_MAX_TABLES 256 +#define KRT_USE_SETFIB_SCAN +#define KRT_USE_SETFIB_SOCK +#define KRT_USE_SYSCTL_NET_FIBS #endif -struct ks_msg +#ifdef __OpenBSD__ +#define KRT_MAX_TABLES (RT_TABLEID_MAX+1) +#define KRT_SHARED_SOCKET +#define KRT_USE_SYSCTL_7 +#endif + +#ifndef KRT_MAX_TABLES +#define KRT_MAX_TABLES 1 +#endif + + + +/* Dynamic max number of tables */ + +int krt_max_tables; + +#ifdef KRT_USE_SYSCTL_NET_FIBS + +static int +krt_get_max_tables(void) { - struct rt_msghdr rtm; - struct sockaddr_storage buf[RTAX_MAX]; -}; + int fibs; + size_t fibs_len = sizeof(fibs); + + if (sysctlbyname("net.fibs", &fibs, &fibs_len, NULL, 0) < 0) + { + log(L_WARN "KRT: unable to get max number of fib tables: %m"); + return 1; + } + + return MIN(fibs, KRT_MAX_TABLES); +} + +#else + +static int +krt_get_max_tables(void) +{ + return KRT_MAX_TABLES; +} + +#endif /* KRT_USE_SYSCTL_NET_FIBS */ + + +/* setfib() syscall for FreeBSD scans */ + +#ifdef KRT_USE_SETFIB_SCAN + +/* +static int krt_default_fib; + +static int +krt_get_active_fib(void) +{ + int fib; + size_t fib_len = sizeof(fib); + + if (sysctlbyname("net.my_fibnum", &fib, &fib_len, NULL, 0) < 0) + { + log(L_WARN "KRT: unable to get active fib number: %m"); + return 0; + } + + return fib; +} +*/ + +extern int setfib(int fib); + +#endif /* KRT_USE_SETFIB_SCAN */ + + +/* table_id -> krt_proto map */ + +#ifdef KRT_SHARED_SOCKET +static struct krt_proto *krt_table_map[KRT_MAX_TABLES]; +#endif -static int rt_sock = 0; +/* Route socket message processing */ int krt_capable(rte *e) @@ -65,6 +160,16 @@ krt_capable(rte *e) ); } +#ifndef RTAX_MAX +#define RTAX_MAX 8 +#endif + +struct ks_msg +{ + struct rt_msghdr rtm; + struct sockaddr_storage buf[RTAX_MAX]; +}; + #define ROUNDUP(a) \ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) @@ -81,7 +186,7 @@ krt_capable(rte *e) body += l;} static int -krt_sock_send(int cmd, rte *e) +krt_send_route(struct krt_proto *p, int cmd, rte *e) { net *net = e->net; rta *a = e->attrs; @@ -103,13 +208,13 @@ krt_sock_send(int cmd, rte *e) msg.rtm.rtm_flags = RTF_UP | RTF_PROTO1; if (net->n.pxlen == MAX_PREFIX_LENGTH) - { msg.rtm.rtm_flags |= RTF_HOST; - } else - { msg.rtm.rtm_addrs |= RTA_NETMASK; - } + +#ifdef KRT_SHARED_SOCKET + msg.rtm.rtm_tableid = KRT_CF->sys.table_id; +#endif #ifdef RTF_REJECT if(a->dest == RTD_UNREACHABLE) @@ -192,7 +297,7 @@ krt_sock_send(int cmd, rte *e) l = body - (char *)&msg; msg.rtm.rtm_msglen = l; - if ((l = write(rt_sock, (char *)&msg, l)) < 0) { + if ((l = write(p->sys.sk->fd, (char *)&msg, l)) < 0) { log(L_ERR "KRT: Error sending route %I/%d to kernel: %m", net->n.prefix, net->n.pxlen); return -1; } @@ -201,16 +306,16 @@ krt_sock_send(int cmd, rte *e) } void -krt_replace_rte(struct krt_proto *p UNUSED, net *n, rte *new, rte *old, +krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs UNUSED) { int err = 0; if (old) - krt_sock_send(RTM_DELETE, old); + krt_send_route(p, RTM_DELETE, old); if (new) - err = krt_sock_send(RTM_ADD, new); + err = krt_send_route(p, RTM_ADD, new); if (err < 0) n->n.flags |= KRF_SYNC_ERROR; @@ -221,8 +326,10 @@ krt_replace_rte(struct krt_proto *p UNUSED, net *n, rte *new, rte *old, #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0) static void -krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan) +krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) { + /* p is NULL iff KRT_SHARED_SOCKET and !scan */ + rte *e; net *net; sockaddr dst, gate, mask; @@ -244,6 +351,17 @@ krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan) if (flags & RTF_LLINFO) SKIP("link-local\n"); +#ifdef KRT_SHARED_SOCKET + if (!scan) + { + int table_id = msg->rtm.rtm_tableid; + p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id] : NULL; + + if (!p) + SKIP("unknown table id %d\n", table_id); + } +#endif + GETADDR(&dst, RTA_DST); GETADDR(&gate, RTA_GATEWAY); GETADDR(&mask, RTA_NETMASK); @@ -536,17 +654,25 @@ krt_read_addr(struct ks_msg *msg) if ((masklen = ipa_mklen(imask)) < 0) { - log("Invalid masklen"); + log(L_ERR "KIF: Invalid masklen %I for %s", imask, iface->name); return; } - bzero(&ifa, sizeof(ifa)); +#ifdef IPV6 + /* Clean up embedded interface ID returned in link-local address */ + + if (ipa_has_link_scope(iaddr)) + _I0(iaddr) = 0xfe800000; + + if (ipa_has_link_scope(ibrd)) + _I0(ibrd) = 0xfe800000; +#endif - ifa.iface = iface; - memcpy(&ifa.ip, &iaddr, sizeof(ip_addr)); + bzero(&ifa, sizeof(ifa)); + ifa.iface = iface; + ifa.ip = iaddr; ifa.pxlen = masklen; - memcpy(&ifa.brd, &ibrd, sizeof(ip_addr)); scope = ipa_classify(ifa.ip); if (scope < 0) @@ -556,24 +682,10 @@ krt_read_addr(struct ks_msg *msg) } ifa.scope = scope & IADDR_SCOPE_MASK; -#ifdef IPV6 - /* Clean up embedded interface ID returned in link-local address */ - if (ipa_has_link_scope(ifa.ip)) - _I0(ifa.ip) = 0xfe800000; -#endif - -#ifdef IPV6 - /* Why not the same check also for IPv4? */ - if ((iface->flags & IF_MULTIACCESS) || (masklen != BITS_PER_IP_ADDRESS)) -#else - if (iface->flags & IF_MULTIACCESS) -#endif + if (masklen < BITS_PER_IP_ADDRESS) { ifa.prefix = ipa_and(ifa.ip, ipa_mkmask(masklen)); - if (masklen == BITS_PER_IP_ADDRESS) - ifa.flags |= IA_HOST; - if (masklen == (BITS_PER_IP_ADDRESS - 1)) ifa.opposite = ipa_opposite_m1(ifa.ip); @@ -581,11 +693,22 @@ krt_read_addr(struct ks_msg *msg) if (masklen == (BITS_PER_IP_ADDRESS - 2)) ifa.opposite = ipa_opposite_m2(ifa.ip); #endif + + if (iface->flags & IF_BROADCAST) + ifa.brd = ibrd; + + if (!(iface->flags & IF_MULTIACCESS)) + ifa.opposite = ibrd; } - else /* PtP iface */ + else if (!(iface->flags & IF_MULTIACCESS) && ipa_nonzero(ibrd)) { + ifa.prefix = ifa.opposite = ibrd; ifa.flags |= IA_PEER; - ifa.prefix = ifa.opposite = ifa.brd; + } + else + { + ifa.prefix = ifa.ip; + ifa.flags |= IA_HOST; } if (new) @@ -594,17 +717,18 @@ krt_read_addr(struct ks_msg *msg) ifa_delete(&ifa); } - -void +static void krt_read_msg(struct proto *p, struct ks_msg *msg, int scan) { + /* p is NULL iff KRT_SHARED_SOCKET and !scan */ + switch (msg->rtm.rtm_type) { case RTM_GET: if(!scan) return; case RTM_ADD: case RTM_DELETE: - krt_read_rt(msg, (struct krt_proto *)p, scan); + krt_read_route(msg, (struct krt_proto *)p, scan); break; case RTM_IFANNOUNCE: krt_read_ifannounce(msg); @@ -621,14 +745,57 @@ krt_read_msg(struct proto *p, struct ks_msg *msg, int scan) } } + +/* Sysctl based scans */ + +static byte *krt_buffer; +static size_t krt_buflen, krt_bufmin; +static struct proto *krt_buffer_owner; + +static byte * +krt_buffer_update(struct proto *p, size_t *needed) +{ + size_t req = *needed; + + if ((req > krt_buflen) || + ((p == krt_buffer_owner) && (req < krt_bufmin))) + { + /* min buflen is 32 kB, step is 8 kB, or 128 kB if > 1 MB */ + size_t step = (req < 0x100000) ? 0x2000 : 0x20000; + krt_buflen = (req < 0x6000) ? 0x8000 : (req + step); + krt_bufmin = (req < 0x8000) ? 0 : (req - 2*step); + + if (krt_buffer) + mb_free(krt_buffer); + krt_buffer = mb_alloc(krt_pool, krt_buflen); + krt_buffer_owner = p; + } + + *needed = krt_buflen; + return krt_buffer; +} + static void -krt_sysctl_scan(struct proto *p, pool *pool, byte **buf, size_t *bl, int cmd) +krt_buffer_release(struct proto *p) { - byte *next; - int mib[6]; - size_t obl, needed; + if (p == krt_buffer_owner) + { + mb_free(krt_buffer); + krt_buffer = NULL; + krt_buflen = 0; + krt_buffer_owner = 0; + } +} + +static void +krt_sysctl_scan(struct proto *p, int cmd, int table_id) +{ + byte *buf, *next; + int mib[7], mcnt; + size_t needed; struct ks_msg *m; int retries = 3; + int rv; mib[0] = CTL_NET; mib[1] = PF_ROUTE; @@ -636,125 +803,258 @@ krt_sysctl_scan(struct proto *p, pool *pool, byte **buf, size_t *bl, int cmd) mib[3] = BIRD_PF; mib[4] = cmd; mib[5] = 0; + mcnt = 6; - try: - if (sysctl(mib, 6 , NULL , &needed, NULL, 0) < 0) - die("krt_sysctl_scan 1: %m"); - - obl = *bl; +#ifdef KRT_USE_SYSCTL_7 + if (table_id >= 0) + { + mib[6] = table_id; + mcnt = 7; + } +#endif - while (needed > *bl) *bl *= 2; - while (needed < (*bl/2)) *bl /= 2; +#ifdef KRT_USE_SETFIB_SCAN + if (table_id > 0) + if (setfib(table_id) < 0) + { + log(L_ERR "KRT: setfib(%d) failed: %m", table_id); + return; + } +#endif - if ((obl!=*bl) || !*buf) + try: + rv = sysctl(mib, mcnt, NULL, &needed, NULL, 0); + if (rv < 0) { - if (*buf) mb_free(*buf); - if ((*buf = mb_alloc(pool, *bl)) == NULL) die("RT scan buf alloc"); + /* OpenBSD returns EINVAL for not yet used tables */ + if ((errno == EINVAL) && (table_id > 0)) + goto exit; + + log(L_ERR "KRT: Route scan estimate failed: %m"); + goto exit; } - if (sysctl(mib, 6 , *buf, &needed, NULL, 0) < 0) + /* The table is empty */ + if (needed == 0) + goto exit; + + buf = krt_buffer_update(p, &needed); + + rv = sysctl(mib, mcnt, buf, &needed, NULL, 0); + if (rv < 0) { - if (errno == ENOMEM) - { - /* The buffer size changed since last sysctl ('needed' is not changed) */ - if (retries--) - goto try; + /* The buffer size changed since last sysctl ('needed' is not changed) */ + if ((errno == ENOMEM) && retries--) + goto try; - log(L_ERR "KRT: Route scan failed"); - return; - } - die("krt_sysctl_scan 2: %m"); + log(L_ERR "KRT: Route scan failed: %m"); + goto exit; } - for (next = *buf; next < (*buf + needed); next += m->rtm.rtm_msglen) +#ifdef KRT_USE_SETFIB_SCAN + if (table_id > 0) + if (setfib(0) < 0) + die("KRT: setfib(%d) failed: %m", 0); +#endif + + /* Process received messages */ + for (next = buf; next < (buf + needed); next += m->rtm.rtm_msglen) { m = (struct ks_msg *)next; krt_read_msg(p, m, 1); } -} -static byte *krt_buffer = NULL; -static byte *kif_buffer = NULL; -static size_t krt_buflen = 32768; -static size_t kif_buflen = 4096; + return; + + exit: + krt_buffer_release(p); + +#ifdef KRT_USE_SETFIB_SCAN + if (table_id > 0) + if (setfib(0) < 0) + die("KRT: setfib(%d) failed: %m", 0); +#endif +} void krt_do_scan(struct krt_proto *p) { - krt_sysctl_scan((struct proto *)p, p->krt_pool, &krt_buffer, &krt_buflen, NET_RT_DUMP); + krt_sysctl_scan(&p->p, NET_RT_DUMP, KRT_CF->sys.table_id); } void kif_do_scan(struct kif_proto *p) { - struct proto *P = (struct proto *)p; if_start_update(); - krt_sysctl_scan(P, P->pool, &kif_buffer, &kif_buflen, NET_RT_IFLIST); + krt_sysctl_scan(&p->p, NET_RT_IFLIST, -1); if_end_update(); } + +/* Kernel sockets */ + static int krt_sock_hook(sock *sk, int size UNUSED) { struct ks_msg msg; int l = read(sk->fd, (char *)&msg, sizeof(msg)); - if(l <= 0) + if (l <= 0) log(L_ERR "krt-sock: read failed"); else - krt_read_msg((struct proto *)sk->data, &msg, 0); + krt_read_msg((struct proto *) sk->data, &msg, 0); return 0; } +static sock * +krt_sock_open(pool *pool, void *data, int table_id) +{ + sock *sk; + int fd; + + fd = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC); + if (fd < 0) + die("Cannot open kernel socket for routes"); + +#ifdef KRT_USE_SETFIB_SOCK + if (table_id > 0) + { + if (setsockopt(fd, SOL_SOCKET, SO_SETFIB, &table_id, sizeof(table_id)) < 0) + die("Cannot set FIB %d for kernel socket: %m", table_id); + } +#endif + + sk = sk_new(pool); + sk->type = SK_MAGIC; + sk->rx_hook = krt_sock_hook; + sk->fd = fd; + sk->data = data; + + if (sk_open(sk) < 0) + bug("krt-sock: sk_open failed"); + + return sk; +} + + +#ifdef KRT_SHARED_SOCKET + +static sock *krt_sock; +static int krt_sock_count; + + +static void +krt_sock_open_shared(void) +{ + if (!krt_sock_count) + krt_sock = krt_sock_open(krt_pool, NULL, -1); + + krt_sock_count++; +} + +static void +krt_sock_close_shared(void) +{ + krt_sock_count--; + + if (!krt_sock_count) + { + rfree(krt_sock); + krt_sock = NULL; + } +} + void -krt_sys_start(struct krt_proto *x, int first UNUSED) +krt_sys_start(struct krt_proto *p) { - sock *sk_rt; - static int ks_open_tried = 0; + krt_table_map[KRT_CF->sys.table_id] = p; - if (ks_open_tried) - return; + krt_sock_open_shared(); + p->sys.sk = krt_sock; +} - ks_open_tried = 1; +void +krt_sys_shutdown(struct krt_proto *p) +{ + krt_sock_close_shared(); + p->sys.sk = NULL; - DBG("KRT: Opening kernel socket\n"); + krt_table_map[KRT_CF->sys.table_id] = NULL; - if( (rt_sock = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC)) < 0) - die("Cannot open kernel socket for routes"); + krt_buffer_release(&p->p); +} - sk_rt = sk_new(krt_pool); - sk_rt->type = SK_MAGIC; - sk_rt->rx_hook = krt_sock_hook; - sk_rt->fd = rt_sock; - sk_rt->data = x; - if (sk_open(sk_rt)) - bug("krt-sock: sk_open failed"); +#else + +void +krt_sys_start(struct krt_proto *p) +{ + p->sys.sk = krt_sock_open(p->p.pool, p, KRT_CF->sys.table_id); } void -krt_sys_shutdown(struct krt_proto *x UNUSED, int last UNUSED) +krt_sys_shutdown(struct krt_proto *p) { - if (!krt_buffer) - return; + rfree(p->sys.sk); + p->sys.sk = NULL; - mb_free(krt_buffer); - krt_buffer = NULL; + krt_buffer_release(&p->p); } +#endif /* KRT_SHARED_SOCKET */ + + +/* KRT configuration callbacks */ + +static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32]; + +int +krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) +{ + return n->sys.table_id == o->sys.table_id; +} void -kif_sys_start(struct kif_proto *p UNUSED) +krt_sys_preconfig(struct config *c UNUSED) { + krt_max_tables = krt_get_max_tables(); + bzero(&krt_table_cf, sizeof(krt_table_cf)); } void -kif_sys_shutdown(struct kif_proto *p UNUSED) +krt_sys_postconfig(struct krt_config *x) { - if (!kif_buffer) - return; + u32 *tbl = krt_table_cf; + int id = x->sys.table_id; + + if (tbl[id/32] & (1 << (id%32))) + cf_error("Multiple kernel syncers defined for table #%d", id); + + tbl[id/32] |= (1 << (id%32)); +} + +void krt_sys_init_config(struct krt_config *c) +{ + c->sys.table_id = 0; /* Default table */ +} - mb_free(kif_buffer); - kif_buffer = NULL; +void krt_sys_copy_config(struct krt_config *d, struct krt_config *s) +{ + d->sys.table_id = s->sys.table_id; +} + + +/* KIF misc code */ + +void +kif_sys_start(struct kif_proto *p UNUSED) +{ +} + +void +kif_sys_shutdown(struct kif_proto *p) +{ + krt_buffer_release(&p->p); } diff --git a/sysdep/bsd/krt-sys.h b/sysdep/bsd/krt-sys.h index 88915dde..9c0d4972 100644 --- a/sysdep/bsd/krt-sys.h +++ b/sysdep/bsd/krt-sys.h @@ -9,13 +9,14 @@ #ifndef _BIRD_KRT_SYS_H_ #define _BIRD_KRT_SYS_H_ +struct birdsock; /* Kernel interfaces */ struct kif_params { }; -struct kif_status { +struct kif_state { }; @@ -30,20 +31,18 @@ static inline void kif_sys_copy_config(struct kif_config *d UNUSED, struct kif_c /* Kernel routes */ +extern int krt_max_tables; + struct krt_params { + int table_id; /* Kernel table ID we sync with */ }; -struct krt_status { +struct krt_state { + struct birdsock *sk; }; static inline void krt_sys_init(struct krt_proto *p UNUSED) { } -static inline int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n UNUSED, struct krt_config *o UNUSED) { return 1; } - -static inline void krt_sys_preconfig(struct config *c UNUSED) { } -static inline void krt_sys_postconfig(struct krt_config *c UNUSED) { } -static inline void krt_sys_init_config(struct krt_config *c UNUSED) { } -static inline void krt_sys_copy_config(struct krt_config *d UNUSED, struct krt_config *s UNUSED) { } #endif diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index 4f91def5..cf049a0b 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -6,9 +6,22 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ +#ifdef __NetBSD__ + +#ifndef IP_RECVTTL +#define IP_RECVTTL 23 +#endif + +#ifndef IP_MINTTL +#define IP_MINTTL 24 +#endif + +#endif + #ifdef __DragonFly__ #define TCP_MD5SIG TCP_SIGNATURE_ENABLE #endif + #ifdef IPV6 static inline void @@ -113,7 +126,9 @@ sysio_leave_group(sock *s, ip_addr maddr) /* BSD RX/TX packet info handling for IPv4 */ /* it uses IP_RECVDSTADDR / IP_RECVIF socket options instead of IP_PKTINFO */ -#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_addr)) + CMSG_SPACE(sizeof(struct sockaddr_dl))) +#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_addr)) + \ + CMSG_SPACE(sizeof(struct sockaddr_dl)) + \ + CMSG_SPACE(sizeof(char))) #define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in_addr)) static char * @@ -121,13 +136,18 @@ sysio_register_cmsgs(sock *s) { int ok = 1; if (s->flags & SKF_LADDR_RX) - { - if (setsockopt(s->fd, IPPROTO_IP, IP_RECVDSTADDR, &ok, sizeof(ok)) < 0) - return "IP_RECVDSTADDR"; + { + if (setsockopt(s->fd, IPPROTO_IP, IP_RECVDSTADDR, &ok, sizeof(ok)) < 0) + return "IP_RECVDSTADDR"; + + if (setsockopt(s->fd, IPPROTO_IP, IP_RECVIF, &ok, sizeof(ok)) < 0) + return "IP_RECVIF"; + } + + if ((s->flags & SKF_TTL_RX) && + (setsockopt(s->fd, IPPROTO_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0)) + return "IP_RECVTTL"; - if (setsockopt(s->fd, IPPROTO_IP, IP_RECVIF, &ok, sizeof(ok)) < 0) - return "IP_RECVIF"; - } return NULL; } @@ -136,27 +156,35 @@ static void sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) { struct cmsghdr *cm; + struct in_addr *ra = NULL; + struct sockaddr_dl *ri = NULL; + unsigned char *ttl = NULL; - if (!(s->flags & SKF_LADDR_RX)) - return; + for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) + { + if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVDSTADDR) + ra = (struct in_addr *) CMSG_DATA(cm); - s->laddr = IPA_NONE; - s->lifindex = 0; + if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVIF) + ri = (struct sockaddr_dl *) CMSG_DATA(cm); - for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) - { - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVDSTADDR) - { - struct in_addr *ra = (struct in_addr *) CMSG_DATA(cm); - get_inaddr(&s->laddr, ra); - } + if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVTTL) + ttl = (unsigned char *) CMSG_DATA(cm); + } - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVIF) - { - struct sockaddr_dl *ri = (struct sockaddr_dl *) CMSG_DATA(cm); - s->lifindex = ri->sdl_index; - } - } + if (s->flags & SKF_LADDR_RX) + { + s->laddr = IPA_NONE; + s->lifindex = 0; + + if (ra) + get_inaddr(&s->laddr, ra); + if (ri) + s->lifindex = ri->sdl_index; + } + + if (s->flags & SKF_TTL_RX) + s->ttl = ttl ? *ttl : -1; // log(L_WARN "RX %I %d", s->laddr, s->lifindex); } @@ -244,8 +272,6 @@ sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd) #ifndef IPV6 -#ifdef IP_MINTTL - static int sk_set_min_ttl4(sock *s, int ttl) { @@ -262,25 +288,23 @@ sk_set_min_ttl4(sock *s, int ttl) return 0; } -#else /* no IP_MINTTL */ +#else /* IPv6 */ static int -sk_set_min_ttl4(sock *s, int ttl) +sk_set_min_ttl6(sock *s, int ttl) { - log(L_ERR "IPv4 TTL security not supported"); + log(L_ERR "IPv6 TTL security not supported"); return -1; } #endif -#else /* IPv6 */ + +int sk_priority_control = -1; static int -sk_set_min_ttl6(sock *s, int ttl) +sk_set_priority(sock *s, int prio UNUSED) { - log(L_ERR "IPv6 TTL security not supported"); + log(L_WARN "Socket priority not supported"); return -1; } - -#endif - diff --git a/sysdep/cf/bsd-v6.h b/sysdep/cf/bsd-v6.h index b7f25f64..47a7c7ff 100644 --- a/sysdep/cf/bsd-v6.h +++ b/sysdep/cf/bsd-v6.h @@ -10,8 +10,10 @@ #define CONFIG_AUTO_ROUTES #define CONFIG_SELF_CONSCIOUS +#define CONFIG_MULTIPLE_TABLES #define CONFIG_SKIP_MC_BIND +#define CONFIG_NO_IFACE_BIND /* Link: sysdep/unix diff --git a/sysdep/cf/bsd.h b/sysdep/cf/bsd.h index e7cc135f..5e6d03e8 100644 --- a/sysdep/cf/bsd.h +++ b/sysdep/cf/bsd.h @@ -8,8 +8,10 @@ #define CONFIG_AUTO_ROUTES #define CONFIG_SELF_CONSCIOUS +#define CONFIG_MULTIPLE_TABLES #define CONFIG_SKIP_MC_BIND +#define CONFIG_NO_IFACE_BIND /* Link: sysdep/unix diff --git a/sysdep/config.h b/sysdep/config.h index 7106e4ba..914c1090 100644 --- a/sysdep/config.h +++ b/sysdep/config.h @@ -7,7 +7,7 @@ #define _BIRD_CONFIG_H_ /* BIRD version */ -#define BIRD_VERSION "1.3.8" +#define BIRD_VERSION "1.3.12" /* Include parameters determined by configure script */ #include "sysdep/autoconf.h" @@ -34,6 +34,7 @@ typedef INTEGER_64 s64; typedef unsigned INTEGER_64 u64; typedef u8 byte; typedef u16 word; +typedef unsigned int uint; #endif diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h index cdee7fe3..7b3043a7 100644 --- a/sysdep/linux/krt-sys.h +++ b/sysdep/linux/krt-sys.h @@ -15,7 +15,7 @@ struct kif_params { }; -struct kif_status { +struct kif_state { }; @@ -36,7 +36,7 @@ struct krt_params { int table_id; /* Kernel table ID we sync with */ }; -struct krt_status { +struct krt_state { }; diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y index 51689ff9..b0e35151 100644 --- a/sysdep/linux/netlink.Y +++ b/sysdep/linux/netlink.Y @@ -10,13 +10,13 @@ CF_HDR CF_DECLS -CF_KEYWORDS(ASYNC, KERNEL, TABLE, KRT_PREFSRC, KRT_REALM) +CF_KEYWORDS(KERNEL, TABLE, KRT_PREFSRC, KRT_REALM) CF_GRAMMAR -CF_ADDTO(kern_proto, kern_proto nl_item ';') +CF_ADDTO(kern_proto, kern_proto kern_sys_item ';') -nl_item: +kern_sys_item: KERNEL TABLE expr { if ($3 <= 0 || $3 >= NL_NUM_TABLES) cf_error("Kernel routing table number out of range"); diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 791f715e..90443ed6 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -7,6 +7,7 @@ */ #include <stdio.h> +#include <unistd.h> #include <fcntl.h> #include <sys/socket.h> #include <sys/uio.h> @@ -843,9 +844,11 @@ nl_parse_route(struct nlmsghdr *h, int scan) memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw)); ipa_ntoh(ra.gw); +#ifdef IPV6 /* Silently skip strange 6to4 routes */ if (ipa_in_net(ra.gw, IPA_NONE, 96)) return; +#endif ng = neigh_find2(&p->p, &ra.gw, ra.iface, (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); @@ -1038,11 +1041,9 @@ nl_open_async(void) sock *sk; struct sockaddr_nl sa; int fd; - static int nl_open_tried = 0; - if (nl_open_tried) + if (nl_async_sk) return; - nl_open_tried = 1; DBG("KRT: Opening async netlink socket\n"); @@ -1063,18 +1064,18 @@ nl_open_async(void) if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) { log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m"); + close(fd); return; } + nl_async_rx_buffer = xmalloc(NL_RX_SIZE); + sk = nl_async_sk = sk_new(krt_pool); sk->type = SK_MAGIC; sk->rx_hook = nl_async_hook; sk->fd = fd; if (sk_open(sk)) bug("Netlink: sk_open failed"); - - if (!nl_async_rx_buffer) - nl_async_rx_buffer = xmalloc(NL_RX_SIZE); } /* @@ -1084,19 +1085,18 @@ nl_open_async(void) static u8 nl_cf_table[(NL_NUM_TABLES+7) / 8]; void -krt_sys_start(struct krt_proto *p, int first) +krt_sys_start(struct krt_proto *p) { nl_table_map[KRT_CF->sys.table_id] = p; - if (first) - { - nl_open(); - nl_open_async(); - } + + nl_open(); + nl_open_async(); } void -krt_sys_shutdown(struct krt_proto *p UNUSED, int last UNUSED) +krt_sys_shutdown(struct krt_proto *p UNUSED) { + nl_table_map[KRT_CF->sys.table_id] = NULL; } int diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index 90b3ebd9..250ed586 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -194,17 +194,22 @@ sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd) /* RX/TX packet info handling for IPv4 */ /* Mostly similar to standardized IPv6 code */ -#define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in_pktinfo)) +#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_pktinfo)) + CMSG_SPACE(sizeof(int))) #define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in_pktinfo)) static char * sysio_register_cmsgs(sock *s) { int ok = 1; + if ((s->flags & SKF_LADDR_RX) && - setsockopt(s->fd, IPPROTO_IP, IP_PKTINFO, &ok, sizeof(ok)) < 0) + (setsockopt(s->fd, IPPROTO_IP, IP_PKTINFO, &ok, sizeof(ok)) < 0)) return "IP_PKTINFO"; + if ((s->flags & SKF_TTL_RX) && + (setsockopt(s->fd, IPPROTO_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0)) + return "IP_RECVTTL"; + return NULL; } @@ -213,25 +218,34 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) { struct cmsghdr *cm; struct in_pktinfo *pi = NULL; - - if (!(s->flags & SKF_LADDR_RX)) - return; + int *ttl = NULL; for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) + { + if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_PKTINFO) + pi = (struct in_pktinfo *) CMSG_DATA(cm); + + if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_TTL) + ttl = (int *) CMSG_DATA(cm); + } + + if (s->flags & SKF_LADDR_RX) + { + if (pi) { - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_PKTINFO) - pi = (struct in_pktinfo *) CMSG_DATA(cm); + get_inaddr(&s->laddr, &pi->ipi_addr); + s->lifindex = pi->ipi_ifindex; } - - if (!pi) + else { s->laddr = IPA_NONE; s->lifindex = 0; - return; } + } + + if (s->flags & SKF_TTL_RX) + s->ttl = ttl ? *ttl : -1; - get_inaddr(&s->laddr, &pi->ipi_addr); - s->lifindex = pi->ipi_ifindex; return; } @@ -310,3 +324,22 @@ sk_set_min_ttl6(sock *s, int ttl) } #endif + + +#ifndef IPV6_TCLASS +#define IPV6_TCLASS 67 +#endif + +int sk_priority_control = 7; + +static int +sk_set_priority(sock *s, int prio) +{ + if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0) + { + log(L_WARN "sk_set_priority: setsockopt: %m"); + return -1; + } + + return 0; +} diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y index 844f53df..7bade918 100644 --- a/sysdep/unix/config.Y +++ b/sysdep/unix/config.Y @@ -14,9 +14,9 @@ CF_HDR CF_DECLS CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT) -CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, BASE, NAME) +CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, BASE, NAME, CONFIRM, UNDO, CHECK, TIMEOUT) -%type <i> log_mask log_mask_list log_cat +%type <i> log_mask log_mask_list log_cat cfg_timeout %type <g> log_file %type <t> cfg_name %type <tf> timeformat_which @@ -104,13 +104,26 @@ timeformat_base: /* Unix specific commands */ -CF_CLI_HELP(CONFIGURE, [soft] [\"<file>\"], [[Reload configuration]]) +CF_CLI_HELP(CONFIGURE, ..., [[Reload configuration]]) -CF_CLI(CONFIGURE, cfg_name, [\"<file>\"], [[Reload configuration]]) -{ cmd_reconfig($2, RECONFIG_HARD); } ; +CF_CLI(CONFIGURE, cfg_name cfg_timeout, [\"<file>\"] [timeout [<sec>]], [[Reload configuration]]) +{ cmd_reconfig($2, RECONFIG_HARD, $3); } ; -CF_CLI(CONFIGURE SOFT, cfg_name, [\"<file>\"], [[Reload configuration and ignore changes in filters]]) -{ cmd_reconfig($3, RECONFIG_SOFT); } ; +CF_CLI(CONFIGURE SOFT, cfg_name cfg_timeout, [\"<file>\"] [timeout [<sec>]], [[Reload configuration and ignore changes in filters]]) +{ cmd_reconfig($3, RECONFIG_SOFT, $4); } ; + +/* Hack to get input completion for 'timeout' */ +CF_CLI_CMD(CONFIGURE TIMEOUT, [<sec>], [[Reload configuration with undo timeout]]) +CF_CLI_CMD(CONFIGURE SOFT TIMEOUT, [<sec>], [[Reload configuration with undo timeout]]) + +CF_CLI(CONFIGURE CONFIRM,,, [[Confirm last configuration change - deactivate undo timeout]]) +{ cmd_reconfig_confirm(); } ; + +CF_CLI(CONFIGURE UNDO,,, [[Undo last configuration change]]) +{ cmd_reconfig_undo(); } ; + +CF_CLI(CONFIGURE CHECK, cfg_name, [\"<file>\"], [[Parse configuration and check its validity]]) +{ cmd_check_config($3); } ; CF_CLI(DOWN,,, [[Shut the daemon down]]) { cmd_shutdown(); } ; @@ -120,6 +133,12 @@ cfg_name: | TEXT ; +cfg_timeout: + /* empty */ { $$ = 0; } + | TIMEOUT { $$ = UNIX_DEFAULT_CONFIGURE_TIMEOUT; } + | TIMEOUT expr { $$ = $2; } + ; + CF_CODE CF_END diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index f91b5278..6e3f1e4d 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -17,10 +17,10 @@ #include <sys/time.h> #include <sys/types.h> #include <sys/socket.h> -#include <sys/fcntl.h> #include <sys/uio.h> #include <sys/un.h> #include <unistd.h> +#include <fcntl.h> #include <errno.h> #include <netinet/in.h> #include <netinet/icmp6.h> @@ -121,7 +121,7 @@ static list near_timers, far_timers; static bird_clock_t first_far_timer = TIME_INFINITY; /* now must be different from 0, because 0 is a special value in timer->expires */ -bird_clock_t now = 1, now_real; +bird_clock_t now = 1, now_real, boot_time; static void update_times_plain(void) @@ -538,6 +538,11 @@ sk_free(resource *r) if (s->fd >= 0) { close(s->fd); + + /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ + if (s->flags & SKF_THREAD) + return; + if (s == current_sock) current_sock = sk_next(s); if (s == stored_sock) @@ -598,7 +603,7 @@ sock_new(pool *p) sock *s = ralloc(p, &sk_class); s->pool = p; // s->saddr = s->daddr = IPA_NONE; - s->tos = s->ttl = -1; + s->tos = s->priority = s->ttl = -1; s->fd = -1; return s; } @@ -673,7 +678,7 @@ get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *p #ifdef IPV6 /* PKTINFO handling is also standardized in IPv6 */ -#define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo)) +#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in6_pktinfo)) + CMSG_SPACE(sizeof(int))) #define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo)) /* @@ -685,15 +690,26 @@ get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *p #ifndef IPV6_RECVPKTINFO #define IPV6_RECVPKTINFO IPV6_PKTINFO #endif +/* + * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT. + */ +#ifndef IPV6_RECVHOPLIMIT +#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT +#endif static char * sysio_register_cmsgs(sock *s) { int ok = 1; + if ((s->flags & SKF_LADDR_RX) && - setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0) + (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)) return "IPV6_RECVPKTINFO"; + if ((s->flags & SKF_TTL_RX) && + (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0)) + return "IPV6_RECVHOPLIMIT"; + return NULL; } @@ -702,25 +718,34 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) { struct cmsghdr *cm; struct in6_pktinfo *pi = NULL; - - if (!(s->flags & SKF_LADDR_RX)) - return; + int *hlim = NULL; for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) + { + if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO) + pi = (struct in6_pktinfo *) CMSG_DATA(cm); + + if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT) + hlim = (int *) CMSG_DATA(cm); + } + + if (s->flags & SKF_LADDR_RX) + { + if (pi) { - if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO) - pi = (struct in6_pktinfo *) CMSG_DATA(cm); + get_inaddr(&s->laddr, &pi->ipi6_addr); + s->lifindex = pi->ipi6_ifindex; } - - if (!pi) + else { s->laddr = IPA_NONE; s->lifindex = 0; - return; } + } + + if (s->flags & SKF_TTL_RX) + s->ttl = hlim ? *hlim : -1; - get_inaddr(&s->laddr, &pi->ipi6_addr); - s->lifindex = pi->ipi6_ifindex; return; } @@ -783,21 +808,28 @@ sk_setup(sock *s) ERR("fcntl(O_NONBLOCK)"); if (s->type == SK_UNIX) return NULL; -#ifndef IPV6 + +#ifdef IPV6 + if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0) + WARN("IPV6_TCLASS"); +#else if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0) WARN("IP_TOS"); #endif + if (s->priority >= 0) + sk_set_priority(s, s->priority); + #ifdef IPV6 int v = 1; if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0) WARN("IPV6_V6ONLY"); #endif - if (s->ttl >= 0) - err = sk_set_ttl_int(s); + if ((s->ttl >= 0) && (err = sk_set_ttl_int(s))) + goto bad; - sysio_register_cmsgs(s); + err = sysio_register_cmsgs(s); bad: return err; } @@ -1154,6 +1186,15 @@ sk_open(sock *s) port = s->sport; if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) ERR("SO_REUSEADDR"); + +#ifdef CONFIG_NO_IFACE_BIND + /* Workaround missing ability to bind to an iface */ + if ((type == SK_UDP) && s->iface && ipa_zero(s->saddr)) + { + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0) + ERR("SO_REUSEPORT"); + } +#endif } fill_in_sockaddr(&sa, s->saddr, s->iface, port); if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) @@ -1204,7 +1245,8 @@ sk_open(sock *s) #endif } - sk_insert(s); + if (!(s->flags & SKF_THREAD)) + sk_insert(s); return 0; bad: @@ -1392,7 +1434,9 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa, } */ -static int + /* sk_read() and sk_write() are called from BFD's event loop */ + +int sk_read(sock *s) { switch (s->type) @@ -1469,7 +1513,7 @@ sk_read(sock *s) } } -static int +int sk_write(sock *s) { switch (s->type) @@ -1487,7 +1531,8 @@ sk_write(sock *s) default: if (s->ttx != s->tpos && sk_maybe_write(s) > 0) { - s->tx_hook(s); + if (s->tx_hook) + s->tx_hook(s); return 1; } return 0; @@ -1530,6 +1575,7 @@ io_init(void) krt_io_init(); init_times(); update_times(); + boot_time = now; srandom((int) now_real); } @@ -1557,7 +1603,7 @@ io_loop(void) tm_shot(); continue; } - timo.tv_sec = events ? 0 : tout - now; + timo.tv_sec = events ? 0 : MIN(tout - now, 3); timo.tv_usec = 0; if (sock_recalc_fdsets_p) diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 497d328d..3f9e1479 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -69,12 +69,14 @@ pool *krt_pool; static linpool *krt_filter_lp; +static list krt_proto_list; void krt_io_init(void) { krt_pool = rp_new(&root_pool, "Kernel Syncer"); krt_filter_lp = lp_new(krt_pool, 4080); + init_list(&krt_proto_list); } /* @@ -114,12 +116,18 @@ kif_request_scan(void) } static inline int -prefer_scope(struct ifa *a, struct ifa *b) -{ return (a->scope > SCOPE_LINK) && (b->scope <= SCOPE_LINK); } - -static inline int prefer_addr(struct ifa *a, struct ifa *b) -{ return ipa_compare(a->ip, b->ip) < 0; } +{ + int sa = a->scope > SCOPE_LINK; + int sb = b->scope > SCOPE_LINK; + + if (sa < sb) + return 0; + else if (sa > sb) + return 1; + else + return ipa_compare(a->ip, b->ip) < 0; +} static inline struct ifa * find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask) @@ -130,7 +138,7 @@ find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask) { if (!(a->flags & IA_SECONDARY) && ipa_equal(ipa_and(a->ip, mask), prefix) && - (!b || prefer_scope(a, b) || prefer_addr(a, b))) + (!b || prefer_addr(a, b))) b = a; } @@ -558,12 +566,6 @@ krt_dump_attrs(rte *e) * Routes */ -#ifdef CONFIG_ALL_TABLES_AT_ONCE -static timer *krt_scan_timer; -static int krt_instance_count; -static list krt_instance_list; -#endif - static void krt_flush_routes(struct krt_proto *p) { @@ -574,7 +576,7 @@ krt_flush_routes(struct krt_proto *p) { net *n = (net *) f; rte *e = n->routes; - if (e && (n->n.flags & KRF_INSTALLED)) + if (rte_is_valid(e) && (n->n.flags & KRF_INSTALLED)) { /* FIXME: this does not work if gw is changed in export filter */ krt_replace_rte(p, e->net, NULL, e, NULL); @@ -649,7 +651,7 @@ krt_got_route(struct krt_proto *p, rte *e) } old = net->routes; - if ((net->n.flags & KRF_INSTALLED) && old) + if ((net->n.flags & KRF_INSTALLED) && rte_is_valid(old)) { /* There may be changes in route attributes, we ignore that. Also, this does not work well if gw is changed in export filter */ @@ -727,6 +729,13 @@ krt_prune(struct krt_proto *p) /* Route rejected, should not happen (KRF_INSTALLED) but to be sure .. */ verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE; } + else + { + ea_list **x = &tmpa; + while (*x) + x = &((*x)->next); + *x = new ? new->attrs->eattrs : NULL; + } } switch (verdict) @@ -805,34 +814,87 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new) * Periodic scanning */ + +#ifdef CONFIG_ALL_TABLES_AT_ONCE + +static timer *krt_scan_timer; +static int krt_scan_count; + static void krt_scan(timer *t UNUSED) { struct krt_proto *p; kif_force_scan(); -#ifdef CONFIG_ALL_TABLES_AT_ONCE + + /* We need some node to decide whether to print the debug messages or not */ + p = SKIP_BACK(struct krt_proto, krt_node, HEAD(krt_proto_list)); + KRT_TRACE(p, D_EVENTS, "Scanning routing table"); + + krt_do_scan(NULL); + + void *q; + WALK_LIST(q, krt_proto_list) { - void *q; - /* We need some node to decide whether to print the debug messages or not */ - p = SKIP_BACK(struct krt_proto, instance_node, HEAD(krt_instance_list)); - if (p->instance_node.next) - KRT_TRACE(p, D_EVENTS, "Scanning routing table"); - krt_do_scan(NULL); - WALK_LIST(q, krt_instance_list) - { - p = SKIP_BACK(struct krt_proto, instance_node, q); - krt_prune(p); - } + p = SKIP_BACK(struct krt_proto, krt_node, q); + krt_prune(p); + } +} + +static void +krt_scan_timer_start(struct krt_proto *p) +{ + if (!krt_scan_count) + krt_scan_timer = tm_new_set(krt_pool, krt_scan, NULL, 0, KRT_CF->scan_time); + + krt_scan_count++; + + tm_start(krt_scan_timer, 0); +} + +static void +krt_scan_timer_stop(struct krt_proto *p) +{ + krt_scan_count--; + + if (!krt_scan_count) + { + rfree(krt_scan_timer); + krt_scan_timer = NULL; } +} + #else - p = t->data; + +static void +krt_scan(timer *t) +{ + struct krt_proto *p = t->data; + + kif_force_scan(); + KRT_TRACE(p, D_EVENTS, "Scanning routing table"); krt_do_scan(p); krt_prune(p); -#endif } +static void +krt_scan_timer_start(struct krt_proto *p) +{ + p->scan_timer = tm_new_set(p->p.pool, krt_scan, p, 0, KRT_CF->scan_time); + tm_start(p->scan_timer, 0); +} + +static void +krt_scan_timer_stop(struct krt_proto *p) +{ + tm_stop(p->scan_timer); +} + +#endif + + + /* * Updates @@ -893,7 +955,7 @@ krt_notify(struct proto *P, struct rtable *table UNUSED, net *net, { struct krt_proto *p = (struct krt_proto *) P; - if (shutting_down) + if (config->shutdown) return; if (!(net->n.flags & KRF_INSTALLED)) old = NULL; @@ -935,52 +997,20 @@ krt_init(struct proto_config *c) return &p->p; } -static timer * -krt_start_timer(struct krt_proto *p) -{ - timer *t; - - t = tm_new(p->krt_pool); - t->hook = krt_scan; - t->data = p; - t->recurrent = KRT_CF->scan_time; - tm_start(t, 0); - return t; -} - static int krt_start(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; - int first = 1; -#ifdef CONFIG_ALL_TABLES_AT_ONCE - if (!krt_instance_count++) - init_list(&krt_instance_list); - else - first = 0; - p->krt_pool = krt_pool; - add_tail(&krt_instance_list, &p->instance_node); -#else - p->krt_pool = P->pool; -#endif + add_tail(&krt_proto_list, &p->krt_node); #ifdef KRT_ALLOW_LEARN krt_learn_init(p); #endif - krt_sys_start(p, first); + krt_sys_start(p); - /* Start periodic routing table scanning */ -#ifdef CONFIG_ALL_TABLES_AT_ONCE - if (first) - krt_scan_timer = krt_start_timer(p); - else - tm_start(krt_scan_timer, 0); - p->scan_timer = krt_scan_timer; -#else - p->scan_timer = krt_start_timer(p); -#endif + krt_scan_timer_start(p); return PS_UP; } @@ -989,26 +1019,16 @@ static int krt_shutdown(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; - int last = 1; -#ifdef CONFIG_ALL_TABLES_AT_ONCE - rem_node(&p->instance_node); - if (--krt_instance_count) - last = 0; - else -#endif - tm_stop(p->scan_timer); + krt_scan_timer_stop(p); /* FIXME we should flush routes even when persist during reconfiguration */ if (p->initialized && !KRT_CF->persist) krt_flush_routes(p); - krt_sys_shutdown(p, last); + krt_sys_shutdown(p); -#ifdef CONFIG_ALL_TABLES_AT_ONCE - if (last) - rfree(krt_scan_timer); -#endif + rem_node(&p->krt_node); return PS_DOWN; } diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index d6fbf721..446914d2 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -52,15 +52,17 @@ struct krt_config { struct krt_proto { struct proto p; - struct krt_status sys; /* Sysdep state */ + struct krt_state sys; /* Sysdep state */ + #ifdef KRT_ALLOW_LEARN struct rtable krt_table; /* Internal table of inherited routes */ #endif - pool *krt_pool; /* Pool used for common krt data */ + +#ifndef CONFIG_ALL_TABLES_AT_ONCE timer *scan_timer; -#ifdef CONFIG_ALL_TABLES_AT_ONCE - node instance_node; /* Node in krt instance list */ #endif + + node krt_node; /* Node in krt_proto_list */ int initialized; /* First scan has already been finished */ }; @@ -103,7 +105,7 @@ struct kif_config { struct kif_proto { struct proto p; - struct kif_status sys; /* Sysdep state */ + struct kif_state sys; /* Sysdep state */ }; #define KIF_CF ((struct kif_config *)p->p.cf) @@ -114,8 +116,8 @@ struct proto_config * krt_init_config(int class); /* krt sysdep */ void krt_sys_init(struct krt_proto *); -void krt_sys_start(struct krt_proto *, int); -void krt_sys_shutdown(struct krt_proto *, int); +void krt_sys_start(struct krt_proto *); +void krt_sys_shutdown(struct krt_proto *); int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o); void krt_sys_preconfig(struct config *); diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 92f12f1e..0f4c06e9 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -32,8 +32,24 @@ static FILE *dbgf; static list *current_log_list; static char *current_syslog_name; /* NULL -> syslog closed */ -bird_clock_t rate_limit_time = 5; -int rate_limit_count = 5; +static const bird_clock_t rate_limit_time = 5; +static const int rate_limit_count = 5; + + +#ifdef USE_PTHREADS + +#include <pthread.h> +static pthread_mutex_t log_mutex; +static inline void log_lock(void) { pthread_mutex_lock(&log_mutex); } +static inline void log_unlock(void) { pthread_mutex_unlock(&log_mutex); } + +#else + +static inline void log_lock(void) { } +static inline void log_unlock(void) { } + +#endif + #ifdef HAVE_SYSLOG #include <sys/syslog.h> @@ -65,26 +81,6 @@ static char *class_names[] = { "BUG" }; -#define LOG_BUFFER_SIZE 1024 -static char log_buffer[LOG_BUFFER_SIZE]; -static char *log_buffer_pos; -static int log_buffer_remains; - - -/** - * log_reset - reset the log buffer - * - * This function resets a log buffer and discards buffered - * messages. Should be used before a log message is prepared - * using logn(). - */ -void -log_reset(void) -{ - log_buffer_pos = log_buffer; - log_buffer_remains = LOG_BUFFER_SIZE; - log_buffer[0] = 0; -} /** * log_commit - commit a log message @@ -99,10 +95,14 @@ log_reset(void) * in log(), so it should be written like *L_INFO. */ void -log_commit(int class) +log_commit(int class, buffer *buf) { struct log_config *l; + if (buf->pos == buf->end) + strcpy(buf->end - 100, " ... <too long>"); + + log_lock(); WALK_LIST(l, *current_log_list) { if (!(l->mask & (1 << class))) @@ -117,47 +117,32 @@ log_commit(int class) tm_format_datetime(tbuf, &config->tf_log, now); fprintf(l->fh, "%s <%s> ", tbuf, class_names[class]); } - fputs(log_buffer, l->fh); + fputs(buf->start, l->fh); fputc('\n', l->fh); fflush(l->fh); } #ifdef HAVE_SYSLOG else - syslog(syslog_priorities[class], "%s", log_buffer); + syslog(syslog_priorities[class], "%s", buf->start); #endif } - cli_echo(class, log_buffer); - - log_reset(); -} + log_unlock(); -static void -log_print(const char *msg, va_list args) -{ - int i; - - if (log_buffer_remains == 0) - return; - - i=bvsnprintf(log_buffer_pos, log_buffer_remains, msg, args); - if (i < 0) - { - bsprintf(log_buffer + LOG_BUFFER_SIZE - 100, " ... <too long>"); - log_buffer_remains = 0; - return; - } + /* FIXME: cli_echo is not thread-safe */ + cli_echo(class, buf->start); - log_buffer_pos += i; - log_buffer_remains -= i; + buf->pos = buf->start; } +int buffer_vprint(buffer *buf, const char *fmt, va_list args); static void vlog(int class, const char *msg, va_list args) { - log_reset(); - log_print(msg, args); - log_commit(class); + buffer buf; + LOG_BUFFER_INIT(buf); + buffer_vprint(&buf, msg, args); + log_commit(class, &buf); } @@ -186,26 +171,6 @@ log_msg(char *msg, ...) va_end(args); } -/** - * logn - prepare a partial message in the log buffer - * @msg: printf-like formatting string (without message class information) - * - * This function formats a message according to the format string @msg - * and adds it to the log buffer. Messages in the log buffer are - * logged when the buffer is flushed using log_commit() function. The - * message should not contain |\n|, log_commit() also terminates a - * line. - */ -void -logn(char *msg, ...) -{ - va_list args; - - va_start(args, msg); - log_print(msg, args); - va_end(args); -} - void log_rl(struct rate_limit *rl, char *msg, ...) { diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index f0344a8f..7a945826 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -97,9 +97,10 @@ static inline void add_num_const(char *name, int val) { struct symbol *s = cf_find_symbol(name); - s->class = SYM_NUMBER; - s->def = NULL; - s->aux = val; + s->class = SYM_CONSTANT | T_INT; + s->def = cfg_allocz(sizeof(struct f_val)); + SYM_TYPE(s) = T_INT; + SYM_VAL(s).i = val; } /* the code of read_iproute_table() is based on @@ -198,7 +199,7 @@ unix_read_config(struct config **cp, char *name) return ret; } -static void +static struct config * read_config(void) { struct config *conf; @@ -210,7 +211,8 @@ read_config(void) else die("Unable to open configuration file %s: %m", config_name); } - config_commit(conf, RECONFIG_HARD); + + return conf; } void @@ -228,19 +230,17 @@ async_config(void) config_free(conf); } else - config_commit(conf, RECONFIG_HARD); + config_commit(conf, RECONFIG_HARD, 0); } -void -cmd_reconfig(char *name, int type) +static struct config * +cmd_read_config(char *name) { struct config *conf; - if (cli_access_restricted()) - return; - if (!name) name = config_name; + cli_msg(-2, "Reading configuration from %s", name); if (!unix_read_config(&conf, name)) { @@ -249,24 +249,94 @@ cmd_reconfig(char *name, int type) else cli_msg(8002, "%s: %m", name); config_free(conf); + conf = NULL; } - else + + return conf; +} + +void +cmd_check_config(char *name) +{ + struct config *conf = cmd_read_config(name); + if (!conf) + return; + + cli_msg(20, "Configuration OK"); + config_free(conf); +} + +static void +cmd_reconfig_msg(int r) +{ + switch (r) { - switch (config_commit(conf, type)) - { - case CONF_DONE: - cli_msg(3, "Reconfigured."); - break; - case CONF_PROGRESS: - cli_msg(4, "Reconfiguration in progress."); - break; - case CONF_SHUTDOWN: - cli_msg(6, "Reconfiguration ignored, shutting down."); - break; - default: - cli_msg(5, "Reconfiguration already in progress, queueing new config"); - } + case CONF_DONE: cli_msg( 3, "Reconfigured"); break; + case CONF_PROGRESS: cli_msg( 4, "Reconfiguration in progress"); break; + case CONF_QUEUED: cli_msg( 5, "Reconfiguration already in progress, queueing new config"); break; + case CONF_UNQUEUED: cli_msg(17, "Reconfiguration already in progress, removing queued config"); break; + case CONF_CONFIRM: cli_msg(18, "Reconfiguration confirmed"); break; + case CONF_SHUTDOWN: cli_msg( 6, "Reconfiguration ignored, shutting down"); break; + case CONF_NOTHING: cli_msg(19, "Nothing to do"); break; + default: break; + } +} + +/* Hack for scheduled undo notification */ +cli *cmd_reconfig_stored_cli; + +void +cmd_reconfig_undo_notify(void) +{ + if (cmd_reconfig_stored_cli) + { + cli *c = cmd_reconfig_stored_cli; + cli_printf(c, CLI_ASYNC_CODE, "Config timeout expired, starting undo"); + cli_write_trigger(c); + } +} + +void +cmd_reconfig(char *name, int type, int timeout) +{ + if (cli_access_restricted()) + return; + + struct config *conf = cmd_read_config(name); + if (!conf) + return; + + int r = config_commit(conf, type, timeout); + + if ((r >= 0) && (timeout > 0)) + { + cmd_reconfig_stored_cli = this_cli; + cli_msg(-22, "Undo scheduled in %d s", timeout); } + + cmd_reconfig_msg(r); +} + +void +cmd_reconfig_confirm(void) +{ + if (cli_access_restricted()) + return; + + int r = config_confirm(); + cmd_reconfig_msg(r); +} + +void +cmd_reconfig_undo(void) +{ + if (cli_access_restricted()) + return; + + cli_msg(-21, "Undo requested"); + + int r = config_undo(); + cmd_reconfig_msg(r); } /* @@ -404,6 +474,58 @@ cli_init_unix(uid_t use_uid, gid_t use_gid) } /* + * PID file + */ + +static char *pid_file; +static int pid_fd; + +static inline void +open_pid_file(void) +{ + if (!pid_file) + return; + + pid_fd = open(pid_file, O_WRONLY|O_CREAT, 0664); + if (pid_fd < 0) + die("Cannot create PID file %s: %m", pid_file); +} + +static inline void +write_pid_file(void) +{ + int pl, rv; + char ps[24]; + + if (!pid_file) + return; + + /* We don't use PID file for uniqueness, so no need for locking */ + + pl = bsnprintf(ps, sizeof(ps), "%ld\n", (long) getpid()); + if (pl < 0) + bug("PID buffer too small"); + + rv = ftruncate(pid_fd, 0); + if (rv < 0) + die("fruncate: %m"); + + rv = write(pid_fd, ps, pl); + if(rv < 0) + die("write: %m"); + + close(pid_fd); +} + +static inline void +unlink_pid_file(void) +{ + if (pid_file) + unlink(pid_file); +} + + +/* * Shutdown */ @@ -427,6 +549,7 @@ async_shutdown(void) void sysdep_shutdown_done(void) { + unlink_pid_file(); unlink(path_control_socket); log_msg(L_FATAL "Shutdown completed"); exit(0); @@ -479,16 +602,17 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "c:dD:ps:u:g:"; +static char *opt_list = "c:dD:ps:P:u:g:f"; static int parse_and_exit; char *bird_name; static char *use_user; static char *use_group; +static int run_in_foreground = 0; static void usage(void) { - fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-u <user>] [-g <group>]\n", bird_name); + fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-P <pid-file>] [-u <user>] [-g <group>] [-f]\n", bird_name); exit(1); } @@ -587,12 +711,18 @@ parse_args(int argc, char **argv) case 's': path_control_socket = optarg; break; + case 'P': + pid_file = optarg; + break; case 'u': use_user = optarg; break; case 'g': use_group = optarg; break; + case 'f': + run_in_foreground = 1; + break; default: usage(); } @@ -623,6 +753,7 @@ main(int argc, char **argv) rt_init(); if_init(); roa_init(); + config_init(); uid_t use_uid = get_uid(use_user); gid_t use_gid = get_gid(use_group); @@ -639,16 +770,19 @@ main(int argc, char **argv) if (use_uid) drop_uid(use_uid); + if (!parse_and_exit) + open_pid_file(); + protos_build(); proto_build(&proto_unix_kernel); proto_build(&proto_unix_iface); - read_config(); + struct config *conf = read_config(); if (parse_and_exit) exit(0); - if (!debug_flag) + if (!(debug_flag||run_in_foreground)) { pid_t pid = fork(); if (pid < 0) @@ -663,8 +797,12 @@ main(int argc, char **argv) dup2(0, 2); } + write_pid_file(); + signal_init(); + config_commit(conf, RECONFIG_HARD, 0); + #ifdef LOCAL_DEBUG async_dump_flag = 1; #endif diff --git a/sysdep/unix/timer.h b/sysdep/unix/timer.h index a788ae27..17450322 100644 --- a/sysdep/unix/timer.h +++ b/sysdep/unix/timer.h @@ -32,6 +32,7 @@ void tm_dump_all(void); extern bird_clock_t now; /* Relative, monotonic time in seconds */ extern bird_clock_t now_real; /* Time in seconds since fixed known epoch */ +extern bird_clock_t boot_time; static inline bird_clock_t tm_remains(timer *t) diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 3e85c85c..1fc26db2 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -19,9 +19,14 @@ extern char *bird_name; void async_config(void); void async_dump(void); void async_shutdown(void); -void cmd_reconfig(char *name, int type); +void cmd_check_config(char *name); +void cmd_reconfig(char *name, int type, int timeout); +void cmd_reconfig_confirm(void); +void cmd_reconfig_undo(void); void cmd_shutdown(void); +#define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300 + /* io.c */ volatile int async_config_flag; |