diff options
Diffstat (limited to 'sysdep')
-rw-r--r-- | sysdep/bsd/Makefile | 6 | ||||
-rw-r--r-- | sysdep/bsd/Modules | 5 | ||||
-rw-r--r-- | sysdep/bsd/krt-sock.Y | 4 | ||||
-rw-r--r-- | sysdep/bsd/krt-sock.c | 338 | ||||
-rw-r--r-- | sysdep/bsd/krt-sys.h | 5 | ||||
-rw-r--r-- | sysdep/bsd/setkey.h | 14 | ||||
-rw-r--r-- | sysdep/bsd/sysio.h | 6 | ||||
-rw-r--r-- | sysdep/cf/bsd-v6.h | 22 | ||||
-rw-r--r-- | sysdep/cf/bsd.h | 3 | ||||
-rw-r--r-- | sysdep/cf/linux-v6.h | 21 | ||||
-rw-r--r-- | sysdep/cf/linux.h | 9 | ||||
-rw-r--r-- | sysdep/config.h | 9 | ||||
-rw-r--r-- | sysdep/linux/Makefile | 6 | ||||
-rw-r--r-- | sysdep/linux/Modules | 5 | ||||
-rw-r--r-- | sysdep/linux/netlink.c | 895 | ||||
-rw-r--r-- | sysdep/unix/Makefile | 8 | ||||
-rw-r--r-- | sysdep/unix/Modules | 12 | ||||
-rw-r--r-- | sysdep/unix/config.Y | 27 | ||||
-rw-r--r-- | sysdep/unix/io.c | 861 | ||||
-rw-r--r-- | sysdep/unix/krt.Y | 66 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 301 | ||||
-rw-r--r-- | sysdep/unix/krt.h | 33 | ||||
-rw-r--r-- | sysdep/unix/log.c | 11 | ||||
-rw-r--r-- | sysdep/unix/main.c | 15 | ||||
-rw-r--r-- | sysdep/unix/timer.h | 88 | ||||
-rw-r--r-- | sysdep/unix/unix.h | 44 |
26 files changed, 1574 insertions, 1240 deletions
diff --git a/sysdep/bsd/Makefile b/sysdep/bsd/Makefile new file mode 100644 index 00000000..dfa32747 --- /dev/null +++ b/sysdep/bsd/Makefile @@ -0,0 +1,6 @@ +src := krt-sock.c +obj := $(src-o-files) +$(all-daemon) +$(conf-y-targets): $(s)krt-sock.Y + +tests_objs := $(tests_objs) $(src-o-files) diff --git a/sysdep/bsd/Modules b/sysdep/bsd/Modules deleted file mode 100644 index 39db88e9..00000000 --- a/sysdep/bsd/Modules +++ /dev/null @@ -1,5 +0,0 @@ -krt-sock.c -krt-sock.Y -krt-sys.h -sysio.h -setkey.h diff --git a/sysdep/bsd/krt-sock.Y b/sysdep/bsd/krt-sock.Y index 0218f188..81422c79 100644 --- a/sysdep/bsd/krt-sock.Y +++ b/sysdep/bsd/krt-sock.Y @@ -20,8 +20,8 @@ kern_sys_item: KERNEL TABLE expr { if ($3 && (krt_max_tables == 1)) cf_error("Multiple kernel routing tables not supported"); - if ($3 < 0 || $3 >= krt_max_tables) - cf_error("Kernel table id must be in range 0-%d", krt_max_tables - 1); + if ($3 >= krt_max_tables) + cf_error("Kernel table id must be in range 0-%u", krt_max_tables - 1); THIS_KRT->sys.table_id = $3; } diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index f0cebd11..e56dd616 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -28,12 +28,12 @@ #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "lib/timer.h" -#include "lib/unix.h" -#include "lib/krt.h" +#include "sysdep/unix/unix.h" +#include "sysdep/unix/krt.h" #include "lib/string.h" #include "lib/socket.h" +const int rt_default_ecmp = 0; /* * There are significant differences in multiple tables support between BSD variants. @@ -72,14 +72,13 @@ #endif - /* Dynamic max number of tables */ -int krt_max_tables; +uint krt_max_tables; #ifdef KRT_USE_SYSCTL_NET_FIBS -static int +static uint krt_get_max_tables(void) { int fibs; @@ -91,7 +90,11 @@ krt_get_max_tables(void) return 1; } - return MIN(fibs, KRT_MAX_TABLES); + /* Should not happen */ + if (fibs < 1) + return 1; + + return (uint) MIN(fibs, KRT_MAX_TABLES); } #else @@ -136,7 +139,7 @@ extern int setfib(int fib); /* table_id -> krt_proto map */ #ifdef KRT_SHARED_SOCKET -static struct krt_proto *krt_table_map[KRT_MAX_TABLES]; +static struct krt_proto *krt_table_map[KRT_MAX_TABLES][2]; #endif @@ -148,9 +151,7 @@ krt_capable(rte *e) rta *a = e->attrs; return - a->cast == RTC_UNICAST && - (a->dest == RTD_ROUTER - || a->dest == RTD_DEVICE + ((a->dest == RTD_UNICAST && !a->nh.next) /* No multipath support */ #ifdef RTF_REJECT || a->dest == RTD_UNREACHABLE #endif @@ -185,18 +186,27 @@ struct ks_msg memcpy(p, body, (l > sizeof(*p) ? sizeof(*p) : l));\ body += l;} +static inline void +sockaddr_fill_dl(struct sockaddr_dl *sa, struct iface *ifa) +{ + uint len = OFFSETOF(struct sockaddr_dl, sdl_data); + memset(sa, 0, len); + sa->sdl_len = len; + sa->sdl_family = AF_LINK; + sa->sdl_index = ifa->index; +} + static int krt_send_route(struct krt_proto *p, int cmd, rte *e) { net *net = e->net; rta *a = e->attrs; static int msg_seq; - struct iface *j, *i = a->iface; + struct iface *j, *i = a->nh.iface; int l; struct ks_msg msg; char *body = (char *)msg.buf; sockaddr gate, mask, dst; - ip_addr gw; DBG("krt-sock: send %I/%d via %I\n", net->n.prefix, net->n.pxlen, a->gw); @@ -207,7 +217,8 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_addrs = RTA_DST; msg.rtm.rtm_flags = RTF_UP | RTF_PROTO1; - if (net->n.pxlen == MAX_PREFIX_LENGTH) + /* XXXX */ + if (net_pxlen(net->n.addr) == net_max_prefix_length[net->n.addr->type]) msg.rtm.rtm_flags |= RTF_HOST; else msg.rtm.rtm_addrs |= RTA_NETMASK; @@ -225,14 +236,12 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_flags |= RTF_BLACKHOLE; #endif - /* This is really very nasty, but I'm not able - * to add "(reject|blackhole)" route without - * gateway set + /* + * This is really very nasty, but I'm not able to add reject/blackhole route + * without gateway address. */ - if(!i) + if (!i) { - i = HEAD(iface_list); - WALK_LIST(j, iface_list) { if (j->flags & IF_LOOPBACK) @@ -241,52 +250,83 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) break; } } + + if (!i) + { + log(L_ERR "KRT: Cannot find loopback iface"); + return -1; + } } - gw = a->gw; + int af = AF_UNSPEC; -#ifdef IPV6 - /* Embed interface ID to link-local address */ - if (ipa_is_link_local(gw)) - _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); -#endif + switch (net->n.addr->type) { + case NET_IP4: + af = AF_INET; + break; + case NET_IP6: + af = AF_INET6; + break; + default: + log(L_ERR "KRT: Not sending route %N to kernel", net->n.addr); + return -1; + } - sockaddr_fill(&dst, BIRD_AF, net->n.prefix, NULL, 0); - sockaddr_fill(&mask, BIRD_AF, ipa_mkmask(net->n.pxlen), NULL, 0); - sockaddr_fill(&gate, BIRD_AF, gw, NULL, 0); + sockaddr_fill(&dst, af, net_prefix(net->n.addr), NULL, 0); + sockaddr_fill(&mask, af, net_pxmask(net->n.addr), NULL, 0); switch (a->dest) { - case RTD_ROUTER: + case RTD_UNICAST: + if (ipa_nonzero(a->nh.gw)) + { + ip_addr gw = a->nh.gw; + + /* Embed interface ID to link-local address */ + if (ipa_is_link_local(gw)) + _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); + + sockaddr_fill(&gate, af, gw, NULL, 0); msg.rtm.rtm_flags |= RTF_GATEWAY; msg.rtm.rtm_addrs |= RTA_GATEWAY; break; + } #ifdef RTF_REJECT - case RTD_UNREACHABLE: + case RTD_UNREACHABLE: #endif #ifdef RTF_BLACKHOLE - case RTD_BLACKHOLE: + case RTD_BLACKHOLE: #endif - case RTD_DEVICE: - if(i) - { -#ifdef RTF_CLONING - if (cmd == RTM_ADD && (i->flags & IF_MULTIACCESS) != IF_MULTIACCESS) /* PTP */ - msg.rtm.rtm_flags |= RTF_CLONING; + { + /* Fallback for all other valid cases */ + +#if __OpenBSD__ + /* Keeping temporarily old code for OpenBSD */ + struct ifa *addr = (net->n.addr->type == NET_IP4) ? i->addr4 : (i->addr6 ?: i->llv6); + + if (!addr) + { + log(L_ERR "KRT: interface %s has no IP addess", i->name); + return -1; + } + + /* Embed interface ID to link-local address */ + ip_addr gw = addr->ip; + if (ipa_is_link_local(gw)) + _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); + + sockaddr_fill(&gate, af, gw, i, 0); +#else + sockaddr_fill_dl(&gate, i); #endif - if(!i->addr) { - log(L_ERR "KRT: interface %s has no IP addess", i->name); - return -1; - } + msg.rtm.rtm_addrs |= RTA_GATEWAY; + break; + } - sockaddr_fill(&gate, BIRD_AF, i->addr->ip, NULL, 0); - msg.rtm.rtm_addrs |= RTA_GATEWAY; - } - break; - default: - bug("krt-sock: unknown flags, but not filtered"); + default: + bug("krt-sock: unknown flags, but not filtered"); } msg.rtm.rtm_index = i->index; @@ -299,7 +339,7 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_msglen = l; if ((l = write(p->sys.sk->fd, (char *)&msg, l)) < 0) { - log(L_ERR "KRT: Error sending route %I/%d to kernel: %m", net->n.prefix, net->n.pxlen); + log(L_ERR "KRT: Error sending route %N to kernel: %m", net->n.addr); return -1; } @@ -331,10 +371,12 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) { /* p is NULL iff KRT_SHARED_SOCKET and !scan */ + int ipv6; rte *e; net *net; sockaddr dst, gate, mask; ip_addr idst, igate, imask; + net_addr ndst; void *body = (char *)msg->buf; int new = (msg->rtm.rtm_type != RTM_DELETE); char *errmsg = "KRT: Invalid route received"; @@ -352,42 +394,64 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) if (flags & RTF_LLINFO) SKIP("link-local\n"); -#ifdef KRT_SHARED_SOCKET - if (!scan) - { - int table_id = msg->rtm.rtm_tableid; - p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id] : NULL; - - if (!p) - SKIP("unknown table id %d\n", table_id); - } -#endif - GETADDR(&dst, RTA_DST); GETADDR(&gate, RTA_GATEWAY); GETADDR(&mask, RTA_NETMASK); - if (dst.sa.sa_family != BIRD_AF) - SKIP("invalid DST"); + switch (dst.sa.sa_family) { + case AF_INET: + ipv6 = 0; + break; + case AF_INET6: + ipv6 = 1; + break; + default: + SKIP("invalid DST"); + } + + /* We do not test family for RTA_NETMASK, because BSD sends us + some strange values, but interpreting them as IPv4/IPv6 works */ + mask.sa.sa_family = dst.sa.sa_family; idst = ipa_from_sa(&dst); imask = ipa_from_sa(&mask); - igate = (gate.sa.sa_family == BIRD_AF) ? ipa_from_sa(&gate) : IPA_NONE; + igate = (gate.sa.sa_family == dst.sa.sa_family) ? ipa_from_sa(&gate) : IPA_NONE; - /* We do not test family for RTA_NETMASK, because BSD sends us - some strange values, but interpreting them as IPv4/IPv6 works */ +#ifdef KRT_SHARED_SOCKET + if (!scan) + { + int table_id = msg->rtm.rtm_tableid; + p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id][ipv6] : NULL; + if (!p) + SKIP("unknown table id %d\n", table_id); + } +#endif + if ((!ipv6) && (p->p.main_channel->table->addr_type != NET_IP4)) + SKIP("reading only IPv4 routes"); + if ( ipv6 && (p->p.main_channel->table->addr_type != NET_IP6)) + SKIP("reading only IPv6 routes"); int c = ipa_classify_net(idst); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) SKIP("strange class/scope\n"); - int pxlen = (flags & RTF_HOST) ? MAX_PREFIX_LENGTH : ipa_masklen(imask); + int pxlen; + if (ipv6) + pxlen = (flags & RTF_HOST) ? IP6_MAX_PREFIX_LENGTH : ip6_masklen(&ipa_to_ip6(imask)); + else + pxlen = (flags & RTF_HOST) ? IP4_MAX_PREFIX_LENGTH : ip4_masklen(ipa_to_ip4(imask)); + if (pxlen < 0) { log(L_ERR "%s (%I) - netmask %I", errmsg, idst, imask); return; } + if (ipv6) + net_fill_ip6(&ndst, ipa_to_ip6(idst), pxlen); + else + net_fill_ip4(&ndst, ipa_to_ip4(idst), pxlen); + if ((flags & RTF_GATEWAY) && ipa_zero(igate)) - { log(L_ERR "%s (%I/%d) - missing gateway", errmsg, idst, pxlen); return; } + { log(L_ERR "%s (%N) - missing gateway", errmsg, ndst); return; } u32 self_mask = RTF_PROTO1; u32 alien_mask = RTF_STATIC | RTF_PROTO1 | RTF_GATEWAY; @@ -426,13 +490,12 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) else src = KRT_SRC_KERNEL; - net = net_get(p->p.table, idst, pxlen); + net = net_get(p->p.main_channel->table, &ndst); rta a = { .src = p->p.main_source, .source = RTS_INHERIT, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST }; /* reject/blackhole routes have also set RTF_GATEWAY, @@ -452,41 +515,37 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) } #endif - a.iface = if_find_by_index(msg->rtm.rtm_index); - if (!a.iface) + a.nh.iface = if_find_by_index(msg->rtm.rtm_index); + if (!a.nh.iface) { - log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u", - net->n.prefix, net->n.pxlen, msg->rtm.rtm_index); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", + net->n.addr, msg->rtm.rtm_index); return; } + a.dest = RTD_UNICAST; if (flags & RTF_GATEWAY) { neighbor *ng; - a.dest = RTD_ROUTER; - a.gw = igate; + a.nh.gw = igate; -#ifdef IPV6 /* Clean up embedded interface ID returned in link-local address */ - if (ipa_is_link_local(a.gw)) - _I0(a.gw) = 0xfe800000; -#endif + if (ipa_is_link_local(a.nh.gw)) + _I0(a.nh.gw) = 0xfe800000; - ng = neigh_find2(&p->p, &a.gw, a.iface, 0); + ng = neigh_find2(&p->p, &a.nh.gw, a.nh.iface, 0); if (!ng || (ng->scope == SCOPE_HOST)) { /* Ignore routes with next-hop 127.0.0.1, host routes with such next-hop appear on OpenBSD for address aliases. */ - if (ipa_classify(a.gw) == (IADDR_HOST | SCOPE_HOST)) + if (ipa_classify(a.nh.gw) == (IADDR_HOST | SCOPE_HOST)) return; - log(L_ERR "KRT: Received route %I/%d with strange next-hop %I", - net->n.prefix, net->n.pxlen, a.gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", + net->n.addr, a.nh.gw); return; } } - else - a.dest = RTD_DEVICE; done: e = rte_get_temp(&a); @@ -643,22 +702,28 @@ krt_read_addr(struct ks_msg *msg, int scan) GETADDR (&null, RTA_AUTHOR); GETADDR (&brd, RTA_BRD); - /* Some other family address */ - if (addr.sa.sa_family != BIRD_AF) - return; + /* Is addr family IP4 or IP6? */ + int ipv6; + switch (addr.sa.sa_family) { + case AF_INET: ipv6 = 0; break; + case AF_INET6: ipv6 = 1; break; + default: return; + } + + /* We do not test family for RTA_NETMASK, because BSD sends us + some strange values, but interpreting them as IPv4/IPv6 works */ + mask.sa.sa_family = addr.sa.sa_family; iaddr = ipa_from_sa(&addr); imask = ipa_from_sa(&mask); ibrd = ipa_from_sa(&brd); - - if ((masklen = ipa_masklen(imask)) < 0) + if ((ipv6 ? (masklen = ip6_masklen(&ipa_to_ip6(imask))) : (masklen = ip4_masklen(ipa_to_ip4(imask)))) < 0) { - log(L_ERR "KIF: Invalid masklen %I for %s", imask, iface->name); + log(L_ERR "KIF: Invalid mask %I for %s", imask, iface->name); return; } -#ifdef IPV6 /* Clean up embedded interface ID returned in link-local address */ if (ipa_is_link_local(iaddr)) @@ -666,13 +731,11 @@ krt_read_addr(struct ks_msg *msg, int scan) if (ipa_is_link_local(ibrd)) _I0(ibrd) = 0xfe800000; -#endif bzero(&ifa, sizeof(ifa)); ifa.iface = iface; ifa.ip = iaddr; - ifa.pxlen = masklen; scope = ipa_classify(ifa.ip); if (scope < 0) @@ -682,17 +745,16 @@ krt_read_addr(struct ks_msg *msg, int scan) } ifa.scope = scope & IADDR_SCOPE_MASK; - if (masklen < BITS_PER_IP_ADDRESS) + if (masklen < (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH)) { - ifa.prefix = ipa_and(ifa.ip, ipa_mkmask(masklen)); + net_fill_ipa(&ifa.prefix, ifa.ip, masklen); + net_normalize(&ifa.prefix); - if (masklen == (BITS_PER_IP_ADDRESS - 1)) + if (masklen == ((ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH) - 1)) ifa.opposite = ipa_opposite_m1(ifa.ip); -#ifndef IPV6 - if (masklen == (BITS_PER_IP_ADDRESS - 2)) + if ((!ipv6) && (masklen == IP4_MAX_PREFIX_LENGTH - 2)) ifa.opposite = ipa_opposite_m2(ifa.ip); -#endif if (iface->flags & IF_BROADCAST) ifa.brd = ibrd; @@ -702,12 +764,13 @@ krt_read_addr(struct ks_msg *msg, int scan) } else if (!(iface->flags & IF_MULTIACCESS) && ipa_nonzero(ibrd)) { - ifa.prefix = ifa.opposite = ibrd; + net_fill_ipa(&ifa.prefix, ibrd, (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH)); + ifa.opposite = ibrd; ifa.flags |= IA_PEER; } else { - ifa.prefix = ifa.ip; + net_fill_ipa(&ifa.prefix, ifa.ip, (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH)); ifa.flags |= IA_HOST; } @@ -804,7 +867,7 @@ krt_sysctl_scan(struct proto *p, int cmd, int table_id) mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; - mib[3] = BIRD_AF; + mib[3] = 0; // Set AF to 0 for all available families mib[4] = cmd; mib[5] = 0; mcnt = 6; @@ -948,6 +1011,7 @@ krt_sock_open(pool *pool, void *data, int table_id UNUSED) return sk; } +static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32][2]; #ifdef KRT_SHARED_SOCKET @@ -979,7 +1043,17 @@ krt_sock_close_shared(void) int krt_sys_start(struct krt_proto *p) { - krt_table_map[KRT_CF->sys.table_id] = p; + int id = KRT_CF->sys.table_id; + + if (krt_table_cf[id/32][!!(p->af == AF_INET6)] & (1 << (id%32))) + { + log(L_ERR "%s: Multiple kernel syncers defined for table #%d", p->p.name, id); + return 0; + } + + krt_table_cf[id/32][!!(p->af == AF_INET6)] |= (1 << (id%32)); + + krt_table_map[KRT_CF->sys.table_id][!!(p->af == AF_INET6)] = p; krt_sock_open_shared(); p->sys.sk = krt_sock; @@ -990,10 +1064,12 @@ krt_sys_start(struct krt_proto *p) void krt_sys_shutdown(struct krt_proto *p) { + krt_table_cf[(KRT_CF->sys.table_id)/32][!!(p->af == AF_INET6)] &= ~(1 << ((KRT_CF->sys.table_id)%32)); + krt_sock_close_shared(); p->sys.sk = NULL; - krt_table_map[KRT_CF->sys.table_id] = NULL; + krt_table_map[KRT_CF->sys.table_id][!!(p->af == AF_INET6)] = NULL; krt_buffer_release(&p->p); } @@ -1003,6 +1079,16 @@ krt_sys_shutdown(struct krt_proto *p) int krt_sys_start(struct krt_proto *p) { + int id = KRT_CF->sys.table_id; + + if (krt_table_cf[id/32][!!(p->af == AF_INET6)] & (1 << (id%32))) + { + log(L_ERR "%s: Multiple kernel syncers defined for table #%d", p->p.name, id); + return 0; + } + + krt_table_cf[id/32][!!(p->af == AF_INET6)] |= (1 << (id%32)); + p->sys.sk = krt_sock_open(p->p.pool, p, KRT_CF->sys.table_id); return 1; } @@ -1010,6 +1096,8 @@ krt_sys_start(struct krt_proto *p) void krt_sys_shutdown(struct krt_proto *p) { + krt_table_cf[(KRT_CF->sys.table_id)/32][!!(p->af == AF_INET6)] &= ~(1 << ((KRT_CF->sys.table_id)%32)); + rfree(p->sys.sk); p->sys.sk = NULL; @@ -1021,8 +1109,6 @@ krt_sys_shutdown(struct krt_proto *p) /* KRT configuration callbacks */ -static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32]; - int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) { @@ -1036,18 +1122,6 @@ krt_sys_preconfig(struct config *c UNUSED) bzero(&krt_table_cf, sizeof(krt_table_cf)); } -void -krt_sys_postconfig(struct krt_config *x) -{ - u32 *tbl = krt_table_cf; - int id = x->sys.table_id; - - if (tbl[id/32] & (1 << (id%32))) - cf_error("Multiple kernel syncers defined for table #%d", id); - - tbl[id/32] |= (1 << (id%32)); -} - void krt_sys_init_config(struct krt_config *c) { c->sys.table_id = 0; /* Default table */ @@ -1072,13 +1146,11 @@ kif_sys_shutdown(struct kif_proto *p) krt_buffer_release(&p->p); } - -struct ifa * -kif_get_primary_ip(struct iface *i UNUSED6) +int +kif_update_sysdep_addr(struct iface *i) { -#ifndef IPV6 static int fd = -1; - + if (fd < 0) fd = socket(AF_INET, SOCK_DGRAM, 0); @@ -1088,20 +1160,10 @@ kif_get_primary_ip(struct iface *i UNUSED6) int rv = ioctl(fd, SIOCGIFADDR, (char *) &ifr); if (rv < 0) - return NULL; - - ip_addr addr; - struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr; - memcpy(&addr, &sin->sin_addr.s_addr, sizeof(ip_addr)); - ipa_ntoh(addr); + return 0; - struct ifa *a; - WALK_LIST(a, i->addrs) - { - if (ipa_equal(a->ip, addr)) - return a; - } -#endif + ip4_addr old = i->sysdep; + i->sysdep = ipa_to_ip4(ipa_from_sa4(&ifr.ifr_addr)); - return NULL; + return !ip4_equal(i->sysdep, old); } diff --git a/sysdep/bsd/krt-sys.h b/sysdep/bsd/krt-sys.h index 353ffcec..aa6cc72e 100644 --- a/sysdep/bsd/krt-sys.h +++ b/sysdep/bsd/krt-sys.h @@ -31,7 +31,7 @@ static inline void kif_sys_copy_config(struct kif_config *d UNUSED, struct kif_c /* Kernel routes */ -extern int krt_max_tables; +extern uint krt_max_tables; struct krt_params { int table_id; /* Kernel table ID we sync with */ @@ -44,8 +44,9 @@ struct krt_state { static inline void krt_sys_io_init(void) { } static inline void krt_sys_init(struct krt_proto *p UNUSED) { } +static inline void krt_sys_postconfig(struct krt_config *x UNUSED) { } -static inline int krt_sys_get_attr(eattr *a UNUSED, byte *buf UNUSED, int buflen UNUSED) { return 0; } +static inline int krt_sys_get_attr(eattr *a UNUSED, byte *buf UNUSED, int buflen UNUSED) { return GA_UNKNOWN; } #endif diff --git a/sysdep/bsd/setkey.h b/sysdep/bsd/setkey.h index b417faca..3bcd8623 100644 --- a/sysdep/bsd/setkey.h +++ b/sysdep/bsd/setkey.h @@ -11,7 +11,7 @@ #include <netipsec/ipsec.h> #include "nest/bird.h" -#include "lib/unix.h" +#include "sysdep/unix/unix.h" /* @@ -63,7 +63,7 @@ setkey_send(struct sadb_msg *msg, uint len) * operations to implement replace. */ static int -setkey_md5(sockaddr *src, sockaddr *dst, char *passwd, uint type) +setkey_md5(sockaddr *src, sockaddr *dst, uint pxlen, char *passwd, uint type) { uint passwd_len = passwd ? strlen(passwd) : 0; @@ -122,7 +122,7 @@ setkey_md5(sockaddr *src, sockaddr *dst, char *passwd, uint type) saddr->sadb_address_len = PFKEY_UNIT64(len); saddr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; saddr->sadb_address_proto = IPSEC_ULPROTO_ANY; - saddr->sadb_address_prefixlen = MAX_PREFIX_LENGTH; + saddr->sadb_address_prefixlen = pxlen; memcpy(pos + sizeof(struct sadb_address), &src->sa, src->sa.sa_len); pos += len; @@ -132,7 +132,7 @@ setkey_md5(sockaddr *src, sockaddr *dst, char *passwd, uint type) daddr->sadb_address_len = PFKEY_UNIT64(len); daddr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; daddr->sadb_address_proto = IPSEC_ULPROTO_ANY; - daddr->sadb_address_prefixlen = MAX_PREFIX_LENGTH; + daddr->sadb_address_prefixlen = pxlen; memcpy(pos + sizeof(struct sadb_address), &dst->sa, dst->sa.sa_len); pos += len; @@ -152,18 +152,20 @@ sk_set_md5_in_sasp_db(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, sockaddr_fill(&src, s->af, local, ifa, 0); sockaddr_fill(&dst, s->af, remote, ifa, 0); + uint pxlen = (s->af == AF_INET) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; + if (passwd && *passwd) { int len = strlen(passwd); if (len > TCP_KEYLEN_MAX) ERR_MSG("The password for TCP MD5 Signature is too long"); - if (setkey_md5(&src, &dst, passwd, SADB_ADD) < 0) + if (setkey_md5(&src, &dst, pxlen, passwd, SADB_ADD) < 0) ERR_MSG("Cannot add TCP-MD5 password into the IPsec SA/SP database"); } else { - if (setkey_md5(&src, &dst, NULL, SADB_DELETE) < 0) + if (setkey_md5(&src, &dst, pxlen, NULL, SADB_DELETE) < 0) ERR_MSG("Cannot delete TCP-MD5 password from the IPsec SA/SP database"); } return 0; diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index 9b10e6e8..68296e65 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -38,12 +38,12 @@ */ #define INIT_MREQ4(maddr,ifa) \ - { .imr_multiaddr = ipa_to_in4(maddr), .imr_interface = ipa_to_in4(ifa->addr->ip) } + { .imr_multiaddr = ipa_to_in4(maddr), .imr_interface = ip4_to_in4(ifa->sysdep) } static inline int sk_setup_multicast4(sock *s) { - struct in_addr ifa = ipa_to_in4(s->iface->addr->ip); + struct in_addr ifa = ip4_to_in4(s->iface->sysdep); u8 ttl = s->ttl; u8 n = 0; @@ -201,7 +201,7 @@ sk_prepare_ip_header(sock *s, void *hdr, int dlen) #if defined(__FreeBSD__) #define USE_MD5SIG_SETKEY -#include "lib/setkey.h" +#include "sysdep/bsd/setkey.h" #endif int diff --git a/sysdep/cf/bsd-v6.h b/sysdep/cf/bsd-v6.h deleted file mode 100644 index 745dfba3..00000000 --- a/sysdep/cf/bsd-v6.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Configuration for *BSD based systems (tested on FreeBSD and NetBSD) - * - * (c) 2004 Ondrej Filip <feela@network.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#define IPV6 - -#define CONFIG_AUTO_ROUTES -#define CONFIG_SELF_CONSCIOUS -#define CONFIG_MULTIPLE_TABLES -#define CONFIG_SINGLE_ROUTE - -#define CONFIG_SKIP_MC_BIND -#define CONFIG_NO_IFACE_BIND - -/* -Link: sysdep/unix -Link: sysdep/bsd - */ diff --git a/sysdep/cf/bsd.h b/sysdep/cf/bsd.h index 51beb42b..22c54277 100644 --- a/sysdep/cf/bsd.h +++ b/sysdep/cf/bsd.h @@ -15,6 +15,9 @@ #define CONFIG_NO_IFACE_BIND #define CONFIG_USE_HDRINCL +#define CONFIG_INCLUDE_SYSIO_H "sysdep/bsd/sysio.h" +#define CONFIG_INCLUDE_KRTSYS_H "sysdep/bsd/krt-sys.h" + /* Link: sysdep/unix Link: sysdep/bsd diff --git a/sysdep/cf/linux-v6.h b/sysdep/cf/linux-v6.h deleted file mode 100644 index 09f60377..00000000 --- a/sysdep/cf/linux-v6.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Configuration for Linux based systems running IPv6 - * - * (c) 1998--1999 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#define IPV6 - -#define CONFIG_AUTO_ROUTES -#define CONFIG_SELF_CONSCIOUS -#define CONFIG_MULTIPLE_TABLES -#define CONFIG_ALL_TABLES_AT_ONCE - -#define CONFIG_RESTRICTED_PRIVILEGES - -/* -Link: sysdep/linux -Link: sysdep/unix - */ diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index 9e34f869..3a3a15da 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -14,7 +14,16 @@ #define CONFIG_MC_PROPER_SRC #define CONFIG_UNIX_DONTROUTE +#define CONFIG_INCLUDE_SYSIO_H "sysdep/linux/sysio.h" +#define CONFIG_INCLUDE_KRTSYS_H "sysdep/linux/krt-sys.h" + #define CONFIG_RESTRICTED_PRIVILEGES +#define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" + + +#ifndef AF_MPLS +#define AF_MPLS 28 +#endif /* Link: sysdep/linux diff --git a/sysdep/config.h b/sysdep/config.h index e529cd86..a552e6b6 100644 --- a/sysdep/config.h +++ b/sysdep/config.h @@ -6,8 +6,15 @@ #ifndef _BIRD_CONFIG_H_ #define _BIRD_CONFIG_H_ +#define XSTR2(X) #X +#define XSTR1(X) XSTR2(X) + /* BIRD version */ -#define BIRD_VERSION "1.6.3" +#ifdef GIT_LABEL +#define BIRD_VERSION XSTR1(GIT_LABEL) +#else +#define BIRD_VERSION "2.0.1" +#endif /* Include parameters determined by configure script */ #include "sysdep/autoconf.h" diff --git a/sysdep/linux/Makefile b/sysdep/linux/Makefile new file mode 100644 index 00000000..188ac8de --- /dev/null +++ b/sysdep/linux/Makefile @@ -0,0 +1,6 @@ +src := netlink.c +obj := $(src-o-files) +$(all-daemon) +$(conf-y-targets): $(s)netlink.Y + +tests_objs := $(tests_objs) $(src-o-files) diff --git a/sysdep/linux/Modules b/sysdep/linux/Modules deleted file mode 100644 index 940660b6..00000000 --- a/sysdep/linux/Modules +++ /dev/null @@ -1,5 +0,0 @@ -krt-sys.h -netlink.c -netlink.Y -sysio.h -syspriv.h diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 4802897b..4cb51519 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -6,6 +6,7 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ +#include <alloca.h> #include <stdio.h> #include <unistd.h> #include <fcntl.h> @@ -19,9 +20,9 @@ #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "lib/timer.h" -#include "lib/unix.h" -#include "lib/krt.h" +#include "lib/alloca.h" +#include "sysdep/unix/unix.h" +#include "sysdep/unix/krt.h" #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" @@ -32,6 +33,9 @@ #include <linux/netlink.h> #include <linux/rtnetlink.h> +#ifdef HAVE_MPLS_KERNEL +#include <linux/lwtunnel.h> +#endif #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */ #define MSG_TRUNC 0x20 @@ -49,13 +53,26 @@ #define RTA_TABLE 15 #endif +#ifndef RTA_VIA +#define RTA_VIA 18 +#endif -#ifdef IPV6 -#define krt_ecmp6(X) 1 -#else -#define krt_ecmp6(X) 0 +#ifndef RTA_NEWDST +#define RTA_NEWDST 19 +#endif + +#ifndef RTA_ENCAP_TYPE +#define RTA_ENCAP_TYPE 21 +#endif + +#ifndef RTA_ENCAP +#define RTA_ENCAP 22 #endif +#define krt_ecmp6(p) ((p)->af == AF_INET6) + +const int rt_default_ecmp = 16; + /* * Structure nl_parse_state keeps state of received route processing. Ideally, * we could just independently parse received Netlink messages and immediately @@ -130,7 +147,7 @@ nl_open_sock(struct nl_sock *nl) nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (nl->fd < 0) die("Unable to open rtnetlink socket: %m"); - nl->seq = now; + nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */ nl->rx_buffer = xmalloc(NL_RX_SIZE); nl->last_hdr = NULL; nl->last_size = 0; @@ -307,35 +324,40 @@ static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = { #define BIRD_IFA_MAX (IFA_FLAGS+1) -#ifndef IPV6 static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) }, [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) }, - [IFA_FLAGS] = { 1, 1, sizeof(u32) }, + [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; -#else + static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) }, [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; -#endif -#define BIRD_RTA_MAX (RTA_TABLE+1) +#define BIRD_RTA_MAX (RTA_ENCAP+1) -#ifndef IPV6 -static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = { +static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = { [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, }; -#else -static struct nl_want_attrs mpnh_attr_want6[BIRD_RTA_MAX] = { + +static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = { [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, +}; + +#ifdef HAVE_MPLS_KERNEL +static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = { + [RTA_DST] = { 1, 0, 0 }, }; #endif -#ifndef IPV6 static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { [RTA_DST] = { 1, 1, sizeof(ip4_addr) }, [RTA_OIF] = { 1, 1, sizeof(u32) }, @@ -346,8 +368,10 @@ static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { [RTA_MULTIPATH] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, }; -#else + static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { [RTA_DST] = { 1, 1, sizeof(ip6_addr) }, [RTA_IIF] = { 1, 1, sizeof(u32) }, @@ -359,6 +383,21 @@ static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { [RTA_MULTIPATH] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, +}; + +#ifdef HAVE_MPLS_KERNEL +static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = { + [RTA_DST] = { 1, 1, sizeof(u32) }, + [RTA_IIF] = { 1, 1, sizeof(u32) }, + [RTA_OIF] = { 1, 1, sizeof(u32) }, + [RTA_PRIORITY] = { 1, 1, sizeof(u32) }, + [RTA_METRICS] = { 1, 0, 0 }, + [RTA_FLOW] = { 1, 1, sizeof(u32) }, + [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_VIA] = { 1, 0, 0 }, + [RTA_NEWDST] = { 1, 0, 0 }, }; #endif @@ -376,7 +415,7 @@ nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size)) { - log(L_ERR "nl_parse_attrs: Malformed message received"); + log(L_ERR "nl_parse_attrs: Malformed attribute received"); return 0; } @@ -392,6 +431,9 @@ nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, return 1; } +static inline u16 rta_get_u16(struct rtattr *a) +{ return *(u16 *) RTA_DATA(a); } + static inline u32 rta_get_u32(struct rtattr *a) { return *(u32 *) RTA_DATA(a); } @@ -401,6 +443,34 @@ static inline ip4_addr rta_get_ip4(struct rtattr *a) static inline ip6_addr rta_get_ip6(struct rtattr *a) { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); } +static inline ip_addr rta_get_ipa(struct rtattr *a) +{ + if (RTA_PAYLOAD(a) == sizeof(ip4_addr)) + return ipa_from_ip4(rta_get_ip4(a)); + else + return ipa_from_ip6(rta_get_ip6(a)); +} + +#ifdef HAVE_MPLS_KERNEL +static inline ip_addr rta_get_via(struct rtattr *a) +{ + struct rtvia *v = RTA_DATA(a); + switch(v->rtvia_family) { + case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr)); + case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr)); + } + return IPA_NONE; +} + +static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK]; +static inline int rta_get_mpls(struct rtattr *a, u32 *stack) +{ + if (RTA_PAYLOAD(a) % 4) + log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a)); + + return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack); +} +#endif struct rtattr * nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen) @@ -422,31 +492,92 @@ nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint return a; } +static inline struct rtattr * +nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +{ + return nl_add_attr(h, bufsize, code, NULL, 0); +} + +static inline void +nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +{ + a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; +} + +static inline void +nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data) +{ + nl_add_attr(h, bufsize, code, &data, 2); +} + static inline void -nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data) +nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data) { nl_add_attr(h, bufsize, code, &data, 4); } static inline void -nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa) +nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4) { - ipa_hton(ipa); - nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa)); + ip4 = ip4_hton(ip4); + nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4)); } -static inline struct rtattr * -nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +static inline void +nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6) { - return nl_add_attr(h, bufsize, code, NULL, 0); + ip6 = ip6_hton(ip6); + nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6)); } static inline void -nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa) { - a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; + if (ipa_is_ip4(ipa)) + nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa)); + else + nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa)); } +#ifdef HAVE_MPLS_KERNEL +static inline void +nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack) +{ + char buf[len*4]; + mpls_put(buf, len, stack); + nl_add_attr(h, bufsize, code, buf, len*4); +} + +static inline void +nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack) +{ + nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS); + + struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP); + nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack); + nl_close_attr(h, nest); +} + +static inline void +nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa) +{ + struct rtvia *via = alloca(sizeof(struct rtvia) + 16); + + if (ipa_is_ip4(ipa)) + { + via->rtvia_family = AF_INET; + put_ip4(via->rtvia_addr, ipa_to_ip4(ipa)); + nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4); + } + else + { + via->rtvia_family = AF_INET6; + put_ip6(via->rtvia_addr, ipa_to_ip6(ipa)); + nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16); + } +} +#endif + static inline struct rtnexthop * nl_open_nexthop(struct nlmsghdr *h, uint bufsize) { @@ -467,8 +598,30 @@ nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh) nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh; } +static inline void +nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED) +{ +#ifdef HAVE_MPLS_KERNEL + if (nh->labels > 0) + if (af == AF_MPLS) + nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label); + else + nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label); + + if (ipa_nonzero(nh->gw)) + if (af == AF_MPLS) + nl_add_attr_via(h, bufsize, nh->gw); + else + nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); +#else + + if (ipa_nonzero(nh->gw)) + nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); +#endif +} + static void -nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); @@ -480,7 +633,10 @@ nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh) rtnh->rtnh_hops = nh->weight; rtnh->rtnh_ifindex = nh->iface->index; - nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); + nl_add_nexthop(h, bufsize, nh, af); + + if (nh->flags & RNF_ONLINK) + rtnh->rtnh_flags |= RTNH_F_ONLINK; nl_close_nexthop(h, rtnh); } @@ -488,22 +644,16 @@ nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh) nl_close_attr(h, a); } -static struct mpnh * -nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af) +static struct nexthop * +nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af) { - /* Temporary buffer for multicast nexthops */ - static struct mpnh *nh_buffer; - static int nh_buf_size; /* in number of structures */ - static int nh_buf_used; - struct rtattr *a[BIRD_RTA_MAX]; struct rtnexthop *nh = RTA_DATA(ra); - struct mpnh *rv, *first, **last; + struct nexthop *rv, *first, **last; unsigned len = RTA_PAYLOAD(ra); first = NULL; last = &first; - nh_buf_used = 0; while (len) { @@ -511,13 +661,7 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af) if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) return NULL; - if (nh_buf_used == nh_buf_size) - { - nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4; - nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh)); - } - *last = rv = nh_buffer + nh_buf_used++; - rv->next = NULL; + *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); last = &(rv->next); rv->weight = nh->rtnh_hops; @@ -529,33 +673,52 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af) nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0); switch (af) { -#ifndef IPV6 case AF_INET: - if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a))) + if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a))) return NULL; break; -#else + case AF_INET6: - if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want6, a, sizeof(a))) + if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a))) return NULL; break; -#endif + default: return NULL; } if (a[RTA_GATEWAY]) { - memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(rv->gw)); - ipa_ntoh(rv->gw); + rv->gw = rta_get_ipa(a[RTA_GATEWAY]); + + if (nh->rtnh_flags & RTNH_F_ONLINK) + rv->flags |= RNF_ONLINK; - neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface, - (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); - if (!ng || (ng->scope == SCOPE_HOST)) + neighbor *nbr; + nbr = neigh_find2(&p->p, &rv->gw, rv->iface, + (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0); + if (!nbr || (nbr->scope == SCOPE_HOST)) return NULL; } else - return NULL; + rv->gw = IPA_NONE; + +#ifdef HAVE_MPLS_KERNEL + if (a[RTA_ENCAP_TYPE]) + { + if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) { + log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE])); + return NULL; + } + + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + rv->labels = rta_get_mpls(enca[RTA_DST], rv->label); + break; + } +#endif + len -= NLMSG_ALIGN(nh->rtnh_len); nh = RTNH_NEXT(nh); @@ -692,52 +855,131 @@ nl_parse_link(struct nlmsghdr *h, int scan) } static void -nl_parse_addr(struct nlmsghdr *h, int scan) +nl_parse_addr4(struct ifaddrmsg *i, int scan, int new) { - struct ifaddrmsg *i; struct rtattr *a[BIRD_IFA_MAX]; - int new = h->nlmsg_type == RTM_NEWADDR; - struct ifa ifa; struct iface *ifi; - int scope; u32 ifa_flags; + int scope; - if (!(i = nl_checkin(h, sizeof(*i)))) + if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) return; - switch (i->ifa_family) + if (!a[IFA_LOCAL]) { -#ifndef IPV6 - case AF_INET: - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) - return; - if (!a[IFA_LOCAL]) - { - log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)"); - return; - } - break; -#else - case AF_INET6: - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) - return; - break; -#endif - default: - return; + log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)"); + return; } - if (!a[IFA_ADDRESS]) { log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); return; } + ifi = if_find_by_index(i->ifa_index); + if (!ifi) + { + log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index); + return; + } + if (a[IFA_FLAGS]) ifa_flags = rta_get_u32(a[IFA_FLAGS]); else ifa_flags = i->ifa_flags; + struct ifa ifa; + bzero(&ifa, sizeof(ifa)); + ifa.iface = ifi; + if (ifa_flags & IFA_F_SECONDARY) + ifa.flags |= IA_SECONDARY; + + ifa.ip = rta_get_ipa(a[IFA_LOCAL]); + + if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH) + { + log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); + new = 0; + } + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH) + { + ifa.brd = rta_get_ipa(a[IFA_ADDRESS]); + net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen); + + /* It is either a host address or a peer address */ + if (ipa_equal(ifa.ip, ifa.brd)) + ifa.flags |= IA_HOST; + else + { + ifa.flags |= IA_PEER; + ifa.opposite = ifa.brd; + } + } + else + { + net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen); + net_normalize(&ifa.prefix); + + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1) + ifa.opposite = ipa_opposite_m1(ifa.ip); + + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2) + ifa.opposite = ipa_opposite_m2(ifa.ip); + + if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST]) + { + ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]); + ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen))); + + if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd)) + ifa.brd = ipa_from_ip4(xbrd); + else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */ + { + log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name); + ifa.brd = ipa_from_ip4(ybrd); + } + } + } + + scope = ipa_classify(ifa.ip); + if (scope < 0) + { + log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name); + return; + } + ifa.scope = scope & IADDR_SCOPE_MASK; + + DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n", + ifi->index, ifi->name, + new ? "added" : "removed", + ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite); + + if (new) + ifa_update(&ifa); + else + ifa_delete(&ifa); + + if (!scan) + if_end_partial_update(ifi); +} + +static void +nl_parse_addr6(struct ifaddrmsg *i, int scan, int new) +{ + struct rtattr *a[BIRD_IFA_MAX]; + struct iface *ifi; + u32 ifa_flags; + int scope; + + if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) + return; + + if (!a[IFA_ADDRESS]) + { + log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); + return; + } + ifi = if_find_by_index(i->ifa_index); if (!ifi) { @@ -745,65 +987,50 @@ nl_parse_addr(struct nlmsghdr *h, int scan) return; } + if (a[IFA_FLAGS]) + ifa_flags = rta_get_u32(a[IFA_FLAGS]); + else + ifa_flags = i->ifa_flags; + + struct ifa ifa; bzero(&ifa, sizeof(ifa)); ifa.iface = ifi; if (ifa_flags & IFA_F_SECONDARY) ifa.flags |= IA_SECONDARY; -#ifdef IPV6 /* Ignore tentative addresses silently */ if (ifa_flags & IFA_F_TENTATIVE) return; -#endif /* IFA_LOCAL can be unset for IPv6 interfaces */ - memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip)); - ipa_ntoh(ifa.ip); - ifa.pxlen = i->ifa_prefixlen; - if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS) + ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]); + + if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH) { log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); new = 0; } - if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS) + if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH) { - ip_addr addr; - memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr)); - ipa_ntoh(addr); - ifa.prefix = ifa.brd = addr; + ifa.brd = rta_get_ipa(a[IFA_ADDRESS]); + net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen); /* It is either a host address or a peer address */ - if (ipa_equal(ifa.ip, addr)) + if (ipa_equal(ifa.ip, ifa.brd)) ifa.flags |= IA_HOST; else { ifa.flags |= IA_PEER; - ifa.opposite = addr; + ifa.opposite = ifa.brd; } } else { - ip_addr netmask = ipa_mkmask(ifa.pxlen); - ifa.prefix = ipa_and(ifa.ip, netmask); - ifa.brd = ipa_or(ifa.ip, ipa_not(netmask)); - if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1) - ifa.opposite = ipa_opposite_m1(ifa.ip); + net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen); + net_normalize(&ifa.prefix); -#ifndef IPV6 - if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2) - ifa.opposite = ipa_opposite_m2(ifa.ip); - - if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST]) - { - ip_addr xbrd; - memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd)); - ipa_ntoh(xbrd); - if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd)) - ifa.brd = xbrd; - else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */ - log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name); - } -#endif + if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1) + ifa.opposite = ipa_opposite_m1(ifa.ip); } scope = ipa_classify(ifa.ip); @@ -814,10 +1041,10 @@ nl_parse_addr(struct nlmsghdr *h, int scan) } ifa.scope = scope & IADDR_SCOPE_MASK; - DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n", + DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n", ifi->index, ifi->name, new ? "added" : "removed", - ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite); + ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite); if (new) ifa_update(&ifa); @@ -828,6 +1055,26 @@ nl_parse_addr(struct nlmsghdr *h, int scan) if_end_partial_update(ifi); } +static void +nl_parse_addr(struct nlmsghdr *h, int scan) +{ + struct ifaddrmsg *i; + + if (!(i = nl_checkin(h, sizeof(*i)))) + return; + + int new = (h->nlmsg_type == RTM_NEWADDR); + + switch (i->ifa_family) + { + case AF_INET: + return nl_parse_addr4(i, scan, new); + + case AF_INET6: + return nl_parse_addr6(i, scan, new); + } +} + void kif_do_scan(struct kif_proto *p UNUSED) { @@ -862,7 +1109,14 @@ kif_do_scan(struct kif_proto *p UNUSED) } } - nl_request_dump(BIRD_AF, RTM_GETADDR); + nl_request_dump(AF_INET, RTM_GETADDR); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) + nl_parse_addr(h, 1); + else + log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); + + nl_request_dump(AF_INET6, RTM_GETADDR); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) nl_parse_addr(h, 1); @@ -884,10 +1138,10 @@ krt_table_id(struct krt_proto *p) static HASH(struct krt_proto) nl_table_map; -#define RTH_FN(k) u32_hash(k) -#define RTH_EQ(k1,k2) k1 == k2 -#define RTH_KEY(p) krt_table_id(p) -#define RTH_NEXT(p) p->sys.hash_next +#define RTH_KEY(p) p->af, krt_table_id(p) +#define RTH_NEXT(p) p->sys.hash_next +#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2 +#define RTH_FN(a,i) a ^ u32_hash(i) #define RTH_REHASH rth_rehash #define RTH_PARAMS /8, *2, 2, 2, 6, 20 @@ -899,28 +1153,21 @@ krt_capable(rte *e) { rta *a = e->attrs; - if (a->cast != RTC_UNICAST) - return 0; - switch (a->dest) - { - case RTD_ROUTER: - case RTD_DEVICE: - if (a->iface == NULL) - return 0; + { + case RTD_UNICAST: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: - case RTD_MULTIPATH: - break; + return 1; + default: return 0; - } - return 1; + } } static inline int -nh_bufsize(struct mpnh *nh) +nh_bufsize(struct nexthop *nh) { int rv = 0; for (; nh != NULL; nh = nh->next) @@ -929,32 +1176,52 @@ nh_bufsize(struct mpnh *nh) } static int -nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface) +nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, struct nexthop *nh) { eattr *ea; net *net = e->net; rta *a = e->attrs; + int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); u32 priority = 0; struct { struct nlmsghdr h; struct rtmsg r; - char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)]; - } r; - - DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op); - - bzero(&r.h, sizeof(r.h)); - bzero(&r.r, sizeof(r.r)); - r.h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; - r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - r.h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; - - r.r.rtm_family = BIRD_AF; - r.r.rtm_dst_len = net->n.pxlen; - r.r.rtm_protocol = RTPROT_BIRD; - r.r.rtm_scope = RT_SCOPE_NOWHERE; - nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix); + char buf[0]; + } *r; + + int rsize = sizeof(*r) + bufsize; + r = alloca(rsize); + + DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); + + bzero(&r->h, sizeof(r->h)); + bzero(&r->r, sizeof(r->r)); + r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; + r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; + + r->r.rtm_family = p->af; + r->r.rtm_dst_len = net_pxlen(net->n.addr); + r->r.rtm_protocol = RTPROT_BIRD; + r->r.rtm_scope = RT_SCOPE_NOWHERE; +#ifdef HAVE_MPLS_KERNEL + if (p->af == AF_MPLS) + { + /* + * Kernel MPLS code is a bit picky. We must: + * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE) + * 2) Never use RTA_PRIORITY + */ + + u32 label = net_mpls(net->n.addr); + nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label); + r->r.rtm_scope = RT_SCOPE_UNIVERSE; + r->r.rtm_type = RTN_UNICAST; + } + else +#endif + nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); /* * Strange behavior for RTM_DELROUTE: @@ -964,11 +1231,13 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d */ if (krt_table_id(p) < 256) - r.r.rtm_table = krt_table_id(p); + r->r.rtm_table = krt_table_id(p); else - nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p)); + nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p)); - if (a->source == RTS_DUMMY) + if (p->af == AF_MPLS) + priority = 0; + else if (a->source == RTS_DUMMY) priority = e->u.krt.metric; else if (KRT_CF->sys.metric) priority = KRT_CF->sys.metric; @@ -976,23 +1245,25 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d priority = ea->u.data; if (priority) - nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, priority); + nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority); /* For route delete, we do not specify remaining route attributes */ if (op == NL_OP_DELETE) goto dest; /* Default scope is LINK for device routes, UNIVERSE otherwise */ - if (ea = ea_find(eattrs, EA_KRT_SCOPE)) - r.r.rtm_scope = ea->u.data; + if (p->af == AF_MPLS) + r->r.rtm_scope = RT_SCOPE_UNIVERSE; + else if (ea = ea_find(eattrs, EA_KRT_SCOPE)) + r->r.rtm_scope = ea->u.data; else - r.r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) - nl_add_attr_ipa(&r.h, sizeof(r), RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); + nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); if (ea = ea_find(eattrs, EA_KRT_REALM)) - nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data); + nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); u32 metrics[KRT_METRICS_MAX]; @@ -1007,34 +1278,33 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d } if (metrics[0]) - nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX); + nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX); dest: - /* a->iface != NULL checked in krt_capable() for router and device routes */ switch (dest) { - case RTD_ROUTER: - r.r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index); - nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, gw); - break; - case RTD_DEVICE: - r.r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index); + case RTD_UNICAST: + r->r.rtm_type = RTN_UNICAST; + if (nh->next && !krt_ecmp6(p)) + nl_add_multipath(&r->h, rsize, nh, p->af); + else + { + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); + nl_add_nexthop(&r->h, rsize, nh, p->af); + + if (nh->flags & RNF_ONLINK) + r->r.rtm_flags |= RTNH_F_ONLINK; + } break; case RTD_BLACKHOLE: - r.r.rtm_type = RTN_BLACKHOLE; + r->r.rtm_type = RTN_BLACKHOLE; break; case RTD_UNREACHABLE: - r.r.rtm_type = RTN_UNREACHABLE; + r->r.rtm_type = RTN_UNREACHABLE; break; case RTD_PROHIBIT: - r.r.rtm_type = RTN_PROHIBIT; - break; - case RTD_MULTIPATH: - r.r.rtm_type = RTN_UNICAST; - nl_add_multipath(&r.h, sizeof(r), a->nexthops); + r->r.rtm_type = RTN_PROHIBIT; break; case RTD_NONE: break; @@ -1043,7 +1313,7 @@ dest: } /* Ignore missing for DELETE */ - return nl_exchange(&r.h, (op == NL_OP_DELETE)); + return nl_exchange(&r->h, (op == NL_OP_DELETE)); } static inline int @@ -1052,21 +1322,21 @@ nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) rta *a = e->attrs; int err = 0; - if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH)) + if (krt_ecmp6(p) && a->nh.next) { - struct mpnh *nh = a->nexthops; + struct nexthop *nh = &(a->nh); - err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface); + err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_UNICAST, nh); if (err < 0) return err; for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface); + err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_UNICAST, nh); return err; } - return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface); + return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, &(a->nh)); } static inline int @@ -1076,7 +1346,7 @@ nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) /* For IPv6, we just repeatedly request DELETE until we get error */ do - err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL); + err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, NULL); while (krt_ecmp6(p) && !err); return err; @@ -1110,20 +1380,6 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list n->n.flags &= ~KRF_SYNC_ERROR; } - -static inline struct mpnh * -nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight) -{ - struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh)); - - nh->gw = gw; - nh->iface = iface; - nh->next = NULL; - nh->weight = weight; - - return nh; -} - static int nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type) { @@ -1191,9 +1447,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) struct rtattr *a[BIRD_RTA_MAX]; int new = h->nlmsg_type == RTM_NEWROUTE; - ip_addr dst = IPA_NONE; + net_addr dst; u32 oif = ~0; - u32 table; + u32 table_id; u32 priority = 0; u32 def_scope = RT_SCOPE_UNIVERSE; int src; @@ -1203,47 +1459,63 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) switch (i->rtm_family) { -#ifndef IPV6 - case AF_INET: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a))) - return; - break; -#else - case AF_INET6: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) - return; - break; -#endif - default: + case AF_INET: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a))) return; - } - if (a[RTA_DST]) - { - memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst)); - ipa_ntoh(dst); + if (a[RTA_DST]) + net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len); + else + net_fill_ip4(&dst, IP4_NONE, 0); + break; + + case AF_INET6: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) + return; + + if (a[RTA_DST]) + net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len); + else + net_fill_ip6(&dst, IP6_NONE, 0); + break; + +#ifdef HAVE_MPLS_KERNEL + case AF_MPLS: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a))) + return; + + if (!a[RTA_DST]) + SKIP("MPLS route without RTA_DST"); + + if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1) + SKIP("MPLS route with multi-label RTA_DST"); + + net_fill_mpls(&dst, rta_mpls_stack[0]); + break; +#endif + + default: + return; } if (a[RTA_OIF]) oif = rta_get_u32(a[RTA_OIF]); if (a[RTA_TABLE]) - table = rta_get_u32(a[RTA_TABLE]); + table_id = rta_get_u32(a[RTA_TABLE]); else - table = i->rtm_table; + table_id = i->rtm_table; - p = HASH_FIND(nl_table_map, RTH, table); /* Do we know this table? */ - DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, table, i->rtm_protocol, p ? p->p.name : "(none)"); + /* Do we know this table? */ + p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id); if (!p) SKIP("unknown table %d\n", table); -#ifdef IPV6 if (a[RTA_IIF]) SKIP("IIF set\n"); -#else + if (i->rtm_tos != 0) /* We don't support TOS */ SKIP("TOS %02x\n", i->rtm_tos); -#endif if (s->scan && !new) SKIP("RTM_DELROUTE in scan\n"); @@ -1251,7 +1523,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (a[RTA_PRIORITY]) priority = rta_get_u32(a[RTA_PRIORITY]); - int c = ipa_classify_net(dst); + int c = net_classify(&dst); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) SKIP("strange class/scope\n"); @@ -1279,70 +1551,72 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) src = KRT_SRC_ALIEN; } - net *net = net_get(p->p.table, dst, i->rtm_dst_len); + net *net = net_get(p->p.main_channel->table, &dst); if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type)) nl_announce_route(s); - rta *ra = lp_allocz(s->pool, sizeof(rta)); + rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); ra->src = p->p.main_source; ra->source = RTS_INHERIT; ra->scope = SCOPE_UNIVERSE; - ra->cast = RTC_UNICAST; switch (i->rtm_type) { case RTN_UNICAST: + ra->dest = RTD_UNICAST; if (a[RTA_MULTIPATH]) - { - ra->dest = RTD_MULTIPATH; - ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH], i->rtm_family); - if (!ra->nexthops) + { + struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family); + if (!nh) { - log(L_ERR "KRT: Received strange multipath route %I/%d", - net->n.prefix, net->n.pxlen); + log(L_ERR "KRT: Received strange multipath route %N", net->n.addr); return; } + ra->nh = *nh; break; } - ra->iface = if_find_by_index(oif); - if (!ra->iface) + ra->nh.iface = if_find_by_index(oif); + if (!ra->nh.iface) { - log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u", - net->n.prefix, net->n.pxlen, oif); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); return; } - if (a[RTA_GATEWAY]) + if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] +#ifdef HAVE_MPLS_KERNEL + || (i->rtm_family == AF_MPLS) && a[RTA_VIA] +#endif + ) { - neighbor *ng; - ra->dest = RTD_ROUTER; - memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw)); - ipa_ntoh(ra->gw); +#ifdef HAVE_MPLS_KERNEL + if (i->rtm_family == AF_MPLS) + ra->nh.gw = rta_get_via(a[RTA_VIA]); + else +#endif + ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); -#ifdef IPV6 /* Silently skip strange 6to4 routes */ - if (ipa_in_net(ra->gw, IPA_NONE, 96)) + const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); + if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) return; -#endif - ng = neigh_find2(&p->p, &ra->gw, ra->iface, - (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); - if (!ng || (ng->scope == SCOPE_HOST)) + if (i->rtm_flags & RTNH_F_ONLINK) + ra->nh.flags |= RNF_ONLINK; + + neighbor *nbr; + nbr = neigh_find2(&p->p, &(ra->nh.gw), ra->nh.iface, + (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %I/%d with strange next-hop %I", - net->n.prefix, net->n.pxlen, ra->gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, + ra->nh.gw); return; } } - else - { - ra->dest = RTD_DEVICE; - def_scope = RT_SCOPE_LINK; - } break; case RTN_BLACKHOLE: @@ -1360,6 +1634,38 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; } +#ifdef HAVE_MPLS_KERNEL + int labels = 0; + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) + labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); + + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) + { + switch (rta_get_u16(a[RTA_ENCAP_TYPE])) + { + case LWTUNNEL_ENCAP_MPLS: + { + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); + break; + } + default: + SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); + break; + } + } + + if (labels < 0) + { + log(L_WARN "KRT: Too long MPLS stack received, ignoring."); + ra->nh.labels = 0; + } + else + ra->nh.labels = labels; +#endif + if (i->rtm_scope != def_scope) { ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); @@ -1375,9 +1681,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (a[RTA_PREFSRC]) { - ip_addr ps; - memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps)); - ipa_ntoh(ps); + ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); ea->next = ra->eattrs; @@ -1413,8 +1717,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) { - log(L_ERR "KRT: Received route %I/%d with strange RTA_METRICS attribute", - net->n.prefix, net->n.pxlen); + log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr); return; } @@ -1441,8 +1744,8 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) * Ideally, now we would send the received route to the rest of kernel code. * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we * postpone it and merge next hops until the end of the sequence. Note that - * proper multipath updates are rejected by nl_mergable_route(), so it is - * always the first case for them. + * when doing merging of next hops, we expect the new route to be unipath. + * Otherwise, we ignore additional next hops in nexthop_insert(). */ if (!s->net) @@ -1460,15 +1763,20 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *a = s->attrs; + rta *oa = s->attrs; - if (a->dest != RTD_MULTIPATH) + struct nexthop *nhs = &oa->nh; + nexthop_insert(&nhs, &ra->nh); + + /* Perhaps new nexthop is inserted at the first position */ + if (nhs == &ra->nh) { - a->dest = RTD_MULTIPATH; - a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0); - } + /* Swap rtas */ + s->attrs = ra; - mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0)); + /* Keep old eattrs */ + ra->eattrs = oa->eattrs; + } } } @@ -1478,16 +1786,34 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL struct nlmsghdr *h; struct nl_parse_state s; - nl_parse_begin(&s, 1, krt_ecmp6(p)); + nl_parse_begin(&s, 1, 0); + nl_request_dump(AF_INET, RTM_GETROUTE); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) + nl_parse_route(&s, h); + else + log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); - nl_request_dump(BIRD_AF, RTM_GETROUTE); + nl_parse_begin(&s, 1, 1); + nl_request_dump(AF_INET6, RTM_GETROUTE); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) nl_parse_route(&s, h); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); +#ifdef HAVE_MPLS_KERNEL + nl_parse_begin(&s, 1, 1); + nl_request_dump(AF_MPLS, RTM_GETROUTE); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) + nl_parse_route(&s, h); + else + log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); nl_parse_end(&s); +#endif } /* @@ -1609,11 +1935,10 @@ nl_open_async(void) bzero(&sa, sizeof(sa)); sa.nl_family = AF_NETLINK; -#ifdef IPV6 - sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE; -#else - sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE; -#endif + sa.nl_groups = RTMGRP_LINK | + RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE | + RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE; + if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) { log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m"); @@ -1640,14 +1965,14 @@ nl_open_async(void) void krt_sys_io_init(void) { - nl_linpool = lp_new(krt_pool, 4080); + nl_linpool = lp_new_default(krt_pool); HASH_INIT(nl_table_map, krt_pool, 6); } int krt_sys_start(struct krt_proto *p) { - struct krt_proto *old = HASH_FIND(nl_table_map, RTH, krt_table_id(p)); + struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p)); if (old) { @@ -1680,7 +2005,7 @@ void krt_sys_init_config(struct krt_config *cf) { cf->sys.table_id = RT_TABLE_MAIN; - cf->sys.metric = 0; + cf->sys.metric = 32; } void @@ -1751,3 +2076,9 @@ void kif_sys_shutdown(struct kif_proto *p UNUSED) { } + +int +kif_update_sysdep_addr(struct iface *i UNUSED) +{ + return 0; +} diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile new file mode 100644 index 00000000..f592399c --- /dev/null +++ b/sysdep/unix/Makefile @@ -0,0 +1,8 @@ +src := io.c krt.c log.c main.c random.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) +$(conf-y-targets): $(s)krt.Y + +src := $(filter-out main.c, $(src)) +tests_objs := $(tests_objs) $(src-o-files) diff --git a/sysdep/unix/Modules b/sysdep/unix/Modules deleted file mode 100644 index 2c6514df..00000000 --- a/sysdep/unix/Modules +++ /dev/null @@ -1,12 +0,0 @@ -log.c -main.c -timer.h -io.c -unix.h -endian.h -config.Y -random.c - -krt.c -krt.h -krt.Y diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y index d6ab8cab..ccca4a62 100644 --- a/sysdep/unix/config.Y +++ b/sysdep/unix/config.Y @@ -8,14 +8,13 @@ CF_HDR -#include "lib/unix.h" +#include "sysdep/unix/unix.h" #include <stdio.h> CF_DECLS CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT) -CF_KEYWORDS(TIMEFORMAT, ISO, OLD, SHORT, LONG, BASE, NAME, CONFIRM, UNDO, CHECK, TIMEOUT) -CF_KEYWORDS(DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING, TIMEOUT) +CF_KEYWORDS(NAME, CONFIRM, UNDO, CHECK, TIMEOUT, DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING) %type <i> log_mask log_mask_list log_cat cfg_timeout %type <g> log_file @@ -85,28 +84,6 @@ mrtdump_base: ; -CF_ADDTO(conf, timeformat_base) - -timeformat_which: - ROUTE { $$ = &new_config->tf_route; } - | PROTOCOL { $$ = &new_config->tf_proto; } - | BASE { $$ = &new_config->tf_base; } - | LOG { $$ = &new_config->tf_log; } - -timeformat_spec: - timeformat_which TEXT { *$1 = (struct timeformat){$2, NULL, 0}; } - | timeformat_which TEXT expr TEXT { *$1 = (struct timeformat){$2, $4, $3}; } - | timeformat_which ISO SHORT { *$1 = (struct timeformat){"%T", "%F", 20*3600}; } - | timeformat_which ISO LONG { *$1 = (struct timeformat){"%F %T", NULL, 0}; } - | timeformat_which OLD SHORT { *$1 = (struct timeformat){NULL, NULL, 0}; } - | timeformat_which OLD LONG { *$1 = (struct timeformat){"%d-%m-%Y %T", NULL, 0}; } - ; - -timeformat_base: - TIMEFORMAT timeformat_spec ';' - ; - - CF_ADDTO(conf, debug_unix) debug_unix: diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 53a37a50..012deaf0 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -34,14 +34,15 @@ #include "nest/bird.h" #include "lib/lists.h" #include "lib/resource.h" -#include "lib/timer.h" #include "lib/socket.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/string.h" #include "nest/iface.h" +#include "conf/conf.h" -#include "lib/unix.h" -#include "lib/sysio.h" +#include "sysdep/unix/unix.h" +#include CONFIG_INCLUDE_SYSIO_H /* Maximum number of calls of tx handler for one socket in one * poll iteration. Should be small enough to not monopolize CPU by @@ -101,381 +102,60 @@ tracked_fopen(pool *p, char *name, char *mode) return f; } -/** - * DOC: Timers - * - * Timers are resources which represent a wish of a module to call - * a function at the specified time. The platform dependent code - * doesn't guarantee exact timing, only that a timer function - * won't be called before the requested time. - * - * In BIRD, time is represented by values of the &bird_clock_t type - * which are integral numbers interpreted as a relative number of seconds since - * some fixed time point in past. The current time can be read - * from variable @now with reasonable accuracy and is monotonic. There is also - * a current 'absolute' time in variable @now_real reported by OS. - * - * Each timer is described by a &timer structure containing a pointer - * to the handler function (@hook), data private to this function (@data), - * time the function should be called at (@expires, 0 for inactive timers), - * for the other fields see |timer.h|. - */ - -#define NEAR_TIMER_LIMIT 4 -static list near_timers, far_timers; -static bird_clock_t first_far_timer = TIME_INFINITY; - -/* now must be different from 0, because 0 is a special value in timer->expires */ -bird_clock_t now = 1, now_real, boot_time; - -static void -update_times_plain(void) -{ - bird_clock_t new_time = time(NULL); - int delta = new_time - now_real; - - if ((delta >= 0) && (delta < 60)) - now += delta; - else if (now_real != 0) - log(L_WARN "Time jump, delta %d s", delta); +/* + * Time clock + */ - now_real = new_time; -} +btime boot_time; -static void -update_times_gettime(void) +void +times_init(struct timeloop *loop) { struct timespec ts; int rv; rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv != 0) - die("clock_gettime: %m"); - - if (ts.tv_sec != now) { - if (ts.tv_sec < now) - log(L_ERR "Monotonic timer is broken"); - - now = ts.tv_sec; - now_real = time(NULL); - } -} - -static int clock_monotonic_available; - -static inline void -update_times(void) -{ - if (clock_monotonic_available) - update_times_gettime(); - else - update_times_plain(); -} - -static inline void -init_times(void) -{ - struct timespec ts; - clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0); - if (!clock_monotonic_available) - log(L_WARN "Monotonic timer is missing"); -} - - -static void -tm_free(resource *r) -{ - timer *t = (timer *) r; - - tm_stop(t); -} - -static void -tm_dump(resource *r) -{ - timer *t = (timer *) r; - - debug("(code %p, data %p, ", t->hook, t->data); - if (t->randomize) - debug("rand %d, ", t->randomize); - if (t->recurrent) - debug("recur %d, ", t->recurrent); - if (t->expires) - debug("expires in %d sec)\n", t->expires - now); - else - debug("inactive)\n"); -} - -static struct resclass tm_class = { - "Timer", - sizeof(timer), - tm_free, - tm_dump, - NULL, - NULL -}; - -/** - * tm_new - create a timer - * @p: pool - * - * This function creates a new timer resource and returns - * a pointer to it. To use the timer, you need to fill in - * the structure fields and call tm_start() to start timing. - */ -timer * -tm_new(pool *p) -{ - timer *t = ralloc(p, &tm_class); - return t; -} - -static inline void -tm_insert_near(timer *t) -{ - node *n = HEAD(near_timers); - - while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires)) - n = n->next; - insert_node(&t->n, n->prev); -} - -/** - * tm_start - start a timer - * @t: timer - * @after: number of seconds the timer should be run after - * - * This function schedules the hook function of the timer to - * be called after @after seconds. If the timer has been already - * started, it's @expire time is replaced by the new value. - * - * You can have set the @randomize field of @t, the timeout - * will be increased by a random number of seconds chosen - * uniformly from range 0 .. @randomize. - * - * You can call tm_start() from the handler function of the timer - * to request another run of the timer. Also, you can set the @recurrent - * field to have the timer re-added automatically with the same timeout. - */ -void -tm_start(timer *t, unsigned after) -{ - bird_clock_t when; - - if (t->randomize) - after += random() % (t->randomize + 1); - when = now + after; - if (t->expires == when) - return; - if (t->expires) - rem_node(&t->n); - t->expires = when; - if (after <= NEAR_TIMER_LIMIT) - tm_insert_near(t); - else - { - if (!first_far_timer || first_far_timer > when) - first_far_timer = when; - add_tail(&far_timers, &t->n); - } -} - -/** - * tm_stop - stop a timer - * @t: timer - * - * This function stops a timer. If the timer is already stopped, - * nothing happens. - */ -void -tm_stop(timer *t) -{ - if (t->expires) - { - rem_node(&t->n); - t->expires = 0; - } -} + if (rv < 0) + die("Monotonic clock is missing"); -static void -tm_dump_them(char *name, list *l) -{ - node *n; - timer *t; + if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40))) + log(L_WARN "Monotonic clock is crazy"); - debug("%s timers:\n", name); - WALK_LIST(n, *l) - { - t = SKIP_BACK(timer, n, n); - debug("%p ", t); - tm_dump(&t->r); - } - debug("\n"); + loop->last_time = ts.tv_sec S + ts.tv_nsec NS; + loop->real_time = 0; } void -tm_dump_all(void) -{ - tm_dump_them("Near", &near_timers); - tm_dump_them("Far", &far_timers); -} - -static inline time_t -tm_first_shot(void) +times_update(struct timeloop *loop) { - time_t x = first_far_timer; + struct timespec ts; + int rv; - if (!EMPTY_LIST(near_timers)) - { - timer *t = SKIP_BACK(timer, n, HEAD(near_timers)); - if (t->expires < x) - x = t->expires; - } - return x; -} + rv = clock_gettime(CLOCK_MONOTONIC, &ts); + if (rv < 0) + die("clock_gettime: %m"); -void io_log_event(void *hook, void *data); + btime new_time = ts.tv_sec S + ts.tv_nsec NS; -static void -tm_shot(void) -{ - timer *t; - node *n, *m; + if (new_time < loop->last_time) + log(L_ERR "Monotonic clock is broken"); - if (first_far_timer <= now) - { - bird_clock_t limit = now + NEAR_TIMER_LIMIT; - first_far_timer = TIME_INFINITY; - n = HEAD(far_timers); - while (m = n->next) - { - t = SKIP_BACK(timer, n, n); - if (t->expires <= limit) - { - rem_node(n); - tm_insert_near(t); - } - else if (t->expires < first_far_timer) - first_far_timer = t->expires; - n = m; - } - } - while ((n = HEAD(near_timers)) -> next) - { - int delay; - t = SKIP_BACK(timer, n, n); - if (t->expires > now) - break; - rem_node(n); - delay = t->expires - now; - t->expires = 0; - if (t->recurrent) - { - int i = t->recurrent - delay; - if (i < 0) - i = 0; - tm_start(t, i); - } - io_log_event(t->hook, t->data); - t->hook(t); - } -} - -/** - * tm_parse_datetime - parse a date and time - * @x: datetime string - * - * tm_parse_datetime() takes a textual representation of - * a date and time (dd-mm-yyyy hh:mm:ss) - * and converts it to the corresponding value of type &bird_clock_t. - */ -bird_clock_t -tm_parse_datetime(char *x) -{ - struct tm tm; - int n; - time_t t; - - if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n]) - return tm_parse_date(x); - tm.tm_mon--; - tm.tm_year -= 1900; - t = mktime(&tm); - if (t == (time_t) -1) - return 0; - return t; + loop->last_time = new_time; + loop->real_time = 0; } -/** - * tm_parse_date - parse a date - * @x: date string - * - * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy) - * and converts it to the corresponding value of type &bird_clock_t. - */ -bird_clock_t -tm_parse_date(char *x) -{ - struct tm tm; - int n; - time_t t; - if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n]) - return 0; - tm.tm_mon--; - tm.tm_year -= 1900; - tm.tm_hour = tm.tm_min = tm.tm_sec = 0; - t = mktime(&tm); - if (t == (time_t) -1) - return 0; - return t; -} - -static void -tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta) -{ - static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; - - if (delta < 20*3600) - bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min); - else if (delta < 360*86400) - bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday); - else - bsprintf(x, "%d", tm->tm_year+1900); -} - -#include "conf/conf.h" - -/** - * tm_format_datetime - convert date and time to textual representation - * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE - * @fmt_spec: specification of resulting textual representation of the time - * @t: time - * - * This function formats the given relative time value @t to a textual - * date/time representation (dd-mm-yyyy hh:mm:ss) in real time. - */ void -tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) +times_update_real_time(struct timeloop *loop) { - const char *fmt_used; - struct tm *tm; - bird_clock_t delta = now - t; - t = now_real - delta; - tm = localtime(&t); - - if (fmt_spec->fmt1 == NULL) - return tm_format_reltime(x, tm, delta); + struct timespec ts; + int rv; - if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit)) - fmt_used = fmt_spec->fmt1; - else - fmt_used = fmt_spec->fmt2; + rv = clock_gettime(CLOCK_REALTIME, &ts); + if (rv < 0) + die("clock_gettime: %m"); - int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm); - if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE)) - strcpy(x, "<too-long>"); + loop->real_time = ts.tv_sec S + ts.tv_nsec NS; } @@ -1071,26 +751,63 @@ sk_free_bufs(sock *s) } } +#ifdef HAVE_LIBSSH +static void +sk_ssh_free(sock *s) +{ + struct ssh_sock *ssh = s->ssh; + + if (s->ssh == NULL) + return; + + s->ssh = NULL; + + if (ssh->channel) + { + if (ssh_channel_is_open(ssh->channel)) + ssh_channel_close(ssh->channel); + ssh_channel_free(ssh->channel); + ssh->channel = NULL; + } + + if (ssh->session) + { + ssh_disconnect(ssh->session); + ssh_free(ssh->session); + ssh->session = NULL; + } +} +#endif + static void sk_free(resource *r) { sock *s = (sock *) r; sk_free_bufs(s); - if (s->fd >= 0) - { - close(s->fd); - /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ - if (s->flags & SKF_THREAD) - return; +#ifdef HAVE_LIBSSH + if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE) + sk_ssh_free(s); +#endif + + if (s->fd < 0) + return; + /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ + if (!(s->flags & SKF_THREAD)) + { if (s == current_sock) current_sock = sk_next(s); if (s == stored_sock) stored_sock = sk_next(s); rem_node(&s->n); } + + if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE) + close(s->fd); + + s->fd = -1; } void @@ -1141,7 +858,7 @@ static void sk_dump(resource *r) { sock *s = (sock *) r; - static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" }; + static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" }; debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n", sk_type_names[s->type], @@ -1192,6 +909,9 @@ sk_setup(sock *s) int y = 1; int fd = s->fd; + if (s->type == SK_SSH_ACTIVE) + return 0; + if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) ERR("O_NONBLOCK"); @@ -1263,7 +983,7 @@ sk_setup(sock *s) if (sk_is_ipv6(s)) { - if (s->flags & SKF_V6ONLY) + if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP)) if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) ERR("IPV6_V6ONLY"); @@ -1317,6 +1037,16 @@ sk_tcp_connected(sock *s) s->tx_hook(s); } +#ifdef HAVE_LIBSSH +static void +sk_ssh_connected(sock *s) +{ + sk_alloc_bufs(s); + s->type = SK_SSH; + s->tx_hook(s); +} +#endif + static int sk_passive_connected(sock *s, int type) { @@ -1334,8 +1064,8 @@ sk_passive_connected(sock *s, int type) sock *t = sk_new(s->pool); t->type = type; - t->fd = fd; t->af = s->af; + t->fd = fd; t->ttl = s->ttl; t->tos = s->tos; t->rbsize = s->rbsize; @@ -1369,6 +1099,201 @@ sk_passive_connected(sock *s, int type) return 1; } +#ifdef HAVE_LIBSSH +/* + * Return SSH_OK or SSH_AGAIN or SSH_ERROR + */ +static int +sk_ssh_connect(sock *s) +{ + s->fd = ssh_get_fd(s->ssh->session); + + /* Big fall thru automata */ + switch (s->ssh->state) + { + case SK_SSH_CONNECT: + { + switch (ssh_connect(s->ssh->session)) + { + case SSH_AGAIN: + /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1) + * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere + * documented but our code relies on that. + */ + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } + + case SK_SSH_SERVER_KNOWN: + { + s->ssh->state = SK_SSH_SERVER_KNOWN; + + if (s->ssh->server_hostkey_path) + { + int server_identity_is_ok = 1; + + /* Check server identity */ + switch (ssh_is_server_known(s->ssh->session)) + { +#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args); + case SSH_SERVER_KNOWN_OK: + /* The server is known and has not changed. */ + break; + + case SSH_SERVER_NOT_KNOWN: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path); + break; + + case SSH_SERVER_KNOWN_CHANGED: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key."); + server_identity_is_ok = 0; + break; + + case SSH_SERVER_FILE_NOT_FOUND: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path); + server_identity_is_ok = 0; + break; + + case SSH_SERVER_ERROR: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened"); + server_identity_is_ok = 0; + break; + + case SSH_SERVER_FOUND_OTHER: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \ + "It is a possible attack."); + server_identity_is_ok = 0; + break; + } + + if (!server_identity_is_ok) + return SSH_ERROR; + } + } + + case SK_SSH_USERAUTH: + { + s->ssh->state = SK_SSH_USERAUTH; + switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL)) + { + case SSH_AUTH_AGAIN: + return SSH_AGAIN; + + case SSH_AUTH_SUCCESS: + break; + + default: + return SSH_ERROR; + } + } + + case SK_SSH_CHANNEL: + { + s->ssh->state = SK_SSH_CHANNEL; + s->ssh->channel = ssh_channel_new(s->ssh->session); + if (s->ssh->channel == NULL) + return SSH_ERROR; + } + + case SK_SSH_SESSION: + { + s->ssh->state = SK_SSH_SESSION; + switch (ssh_channel_open_session(s->ssh->channel)) + { + case SSH_AGAIN: + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } + + case SK_SSH_SUBSYSTEM: + { + s->ssh->state = SK_SSH_SUBSYSTEM; + if (s->ssh->subsystem) + { + switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem)) + { + case SSH_AGAIN: + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } + } + + case SK_SSH_ESTABLISHED: + s->ssh->state = SK_SSH_ESTABLISHED; + } + + return SSH_OK; +} + +/* + * Return file descriptor number if success + * Return -1 if failed + */ +static int +sk_open_ssh(sock *s) +{ + if (!s->ssh) + bug("sk_open() sock->ssh is not allocated"); + + ssh_session sess = ssh_new(); + if (sess == NULL) + ERR2("Cannot create a ssh session"); + s->ssh->session = sess; + + const int verbosity = SSH_LOG_NOLOG; + ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity); + ssh_options_set(sess, SSH_OPTIONS_HOST, s->host); + ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport)); + /* TODO: Add SSH_OPTIONS_BINDADDR */ + ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username); + + if (s->ssh->server_hostkey_path) + ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path); + + if (s->ssh->client_privkey_path) + ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path); + + ssh_set_blocking(sess, 0); + + switch (sk_ssh_connect(s)) + { + case SSH_AGAIN: + break; + + case SSH_OK: + sk_ssh_connected(s); + break; + + case SSH_ERROR: + ERR2(ssh_get_error(sess)); + break; + } + + return ssh_get_fd(sess); + + err: + return -1; +} +#endif + /** * sk_open - open a socket * @s: socket @@ -1382,13 +1307,46 @@ sk_passive_connected(sock *s, int type) int sk_open(sock *s) { - int af = BIRD_AF; + int af = AF_UNSPEC; int fd = -1; int do_bind = 0; int bind_port = 0; ip_addr bind_addr = IPA_NONE; sockaddr sa; + if (s->type <= SK_IP) + { + /* + * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either + * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr). + * But the specifications have to be consistent. + */ + + switch (s->subtype) + { + case 0: + ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) || + (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr))); + af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6; + break; + + case SK_IPV4: + ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr)); + ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr)); + af = AF_INET; + break; + + case SK_IPV6: + ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr)); + ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr)); + af = AF_INET6; + break; + + default: + bug("Invalid subtype %d", s->subtype); + } + } + switch (s->type) { case SK_TCP_ACTIVE: @@ -1401,6 +1359,13 @@ sk_open(sock *s) do_bind = bind_port || ipa_nonzero(bind_addr); break; +#ifdef HAVE_LIBSSH + case SK_SSH_ACTIVE: + s->ttx = ""; /* Force s->ttx != s->tpos */ + fd = sk_open_ssh(s); + break; +#endif + case SK_UDP: fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); bind_port = s->sport; @@ -1456,7 +1421,7 @@ sk_open(sock *s) if (sk_set_high_port(s) < 0) log(L_WARN "Socket error: %s%#m", s->err); - sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port); + sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) ERR2("bind"); } @@ -1468,7 +1433,7 @@ sk_open(sock *s) switch (s->type) { case SK_TCP_ACTIVE: - sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport); + sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0) sk_tcp_connected(s); else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && @@ -1481,6 +1446,7 @@ sk_open(sock *s) ERR2("listen"); break; + case SK_SSH_ACTIVE: case SK_MAGIC: break; @@ -1490,6 +1456,7 @@ sk_open(sock *s) if (!(s->flags & SKF_THREAD)) sk_insert(s); + return 0; err: @@ -1672,6 +1639,28 @@ sk_maybe_write(sock *s) reset_tx_buffer(s); return 1; +#ifdef HAVE_LIBSSH + case SK_SSH: + while (s->ttx != s->tpos) + { + e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx); + + if (e < 0) + { + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + + reset_tx_buffer(s); + /* EPIPE is just a connection close notification during TX */ + s->err_hook(s, (errno != EPIPE) ? errno : 0); + return -1; + } + s->ttx += e; + } + reset_tx_buffer(s); + return 1; +#endif + case SK_UDP: case SK_IP: { @@ -1696,6 +1685,7 @@ sk_maybe_write(sock *s) reset_tx_buffer(s); return 1; } + default: bug("sk_maybe_write: unknown socket type %d", s->type); } @@ -1775,6 +1765,64 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa, } */ +static void +call_rx_hook(sock *s, int size) +{ + if (s->rx_hook(s, size)) + { + /* We need to be careful since the socket could have been deleted by the hook */ + if (current_sock == s) + s->rpos = s->rbuf; + } +} + +#ifdef HAVE_LIBSSH +static int +sk_read_ssh(sock *s) +{ + ssh_channel rchans[2] = { s->ssh->channel, NULL }; + struct timeval timev = { 1, 0 }; + + if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR) + return 1; /* Try again */ + + if (ssh_channel_is_eof(s->ssh->channel) != 0) + { + /* The remote side is closing the connection */ + s->err_hook(s, 0); + return 0; + } + + if (rchans[0] == NULL) + return 0; /* No data is available on the socket */ + + const uint used_bytes = s->rpos - s->rbuf; + const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0); + if (read_bytes > 0) + { + /* Received data */ + s->rpos += read_bytes; + call_rx_hook(s, used_bytes + read_bytes); + return 1; + } + else if (read_bytes == 0) + { + if (ssh_channel_is_eof(s->ssh->channel) != 0) + { + /* The remote side is closing the connection */ + s->err_hook(s, 0); + } + } + else + { + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + } + + return 0; /* No data is available on the socket */ +} +#endif + /* sk_read() and sk_write() are called from BFD's event loop */ int @@ -1808,17 +1856,17 @@ sk_read(sock *s, int revents) else { s->rpos += c; - if (s->rx_hook(s, s->rpos - s->rbuf)) - { - /* We need to be careful since the socket could have been deleted by the hook */ - if (current_sock == s) - s->rpos = s->rbuf; - } + call_rx_hook(s, s->rpos - s->rbuf); return 1; } return 0; } +#ifdef HAVE_LIBSSH + case SK_SSH: + return sk_read_ssh(s); +#endif + case SK_MAGIC: return s->rx_hook(s, 0); @@ -1857,6 +1905,27 @@ sk_write(sock *s) return 0; } +#ifdef HAVE_LIBSSH + case SK_SSH_ACTIVE: + { + switch (sk_ssh_connect(s)) + { + case SSH_OK: + sk_ssh_connected(s); + break; + + case SSH_AGAIN: + return 1; + + case SSH_ERROR: + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + break; + } + return 0; + } +#endif + default: if (s->ttx != s->tpos && sk_maybe_write(s) > 0) { @@ -1868,6 +1937,12 @@ sk_write(sock *s) } } +int sk_is_ipv4(sock *s) +{ return s->af == AF_INET; } + +int sk_is_ipv6(sock *s) +{ return s->af == AF_INET6; } + void sk_err(sock *s, int revents) { @@ -1925,9 +2000,6 @@ io_update_time(void) struct timespec ts; int rv; - if (!clock_monotonic_available) - return; - /* * This is third time-tracking procedure (after update_times() above and * times_update() in BFD), dedicated to internal event log and latency @@ -1938,7 +2010,7 @@ io_update_time(void) if (rv < 0) die("clock_gettime: %m"); - last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000); + last_time = ts.tv_sec S + ts.tv_nsec NS; if (event_open) { @@ -2066,15 +2138,13 @@ volatile int async_shutdown_flag; void io_init(void) { - init_list(&near_timers); - init_list(&far_timers); init_list(&sock_list); init_list(&global_event_list); krt_io_init(); - init_times(); - update_times(); - boot_time = now; - srandom((int) now_real); + // XXX init_times(); + // XXX update_times(); + boot_time = current_time(); + srandom((uint) (current_real_time() TO_S)); } static int short_loops = 0; @@ -2083,9 +2153,9 @@ static int short_loops = 0; void io_loop(void) { - int poll_tout; - time_t tout; + int poll_tout, timeout; int nfds, events, pout; + timer *t; sock *s; node *n; int fdmax = 256; @@ -2094,19 +2164,20 @@ io_loop(void) watchdog_start1(); for(;;) { + times_update(&main_timeloop); events = ev_run_list(&global_event_list); - timers: - update_times(); - tout = tm_first_shot(); - if (tout <= now) - { - tm_shot(); - goto timers; - } - poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */ - + timers_fire(&main_timeloop); io_close_event(); + // FIXME + poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ + if (t = timers_first(&main_timeloop)) + { + times_update(&main_timeloop); + timeout = (tm_remains(t) TO_MS) + 1; + poll_tout = MIN(poll_tout, timeout); + } + nfds = 0; WALK_LIST(n, sock_list) { @@ -2177,6 +2248,8 @@ io_loop(void) } if (pout) { + times_update(&main_timeloop); + /* guaranteed to be non-empty */ current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 6fe39fa9..9aac8668 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -8,16 +8,29 @@ CF_HDR -#include "lib/krt.h" +#include "sysdep/unix/krt.h" CF_DEFINES #define THIS_KRT ((struct krt_config *) this_proto) #define THIS_KIF ((struct kif_config *) this_proto) +#define KIF_IFACE ((struct kif_iface_config *) this_ipatt) + +static void +kif_set_preferred(ip_addr ip) +{ + if (ipa_is_ip4(ip)) + KIF_IFACE->pref_v4 = ip; + else if (!ipa_is_link_local(ip)) + KIF_IFACE->pref_v6 = ip; + else + KIF_IFACE->pref_ll = ip; +} CF_DECLS CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +CF_KEYWORDS(INTERFACE, PREFERRED) %type <i> kern_mp_limit @@ -27,11 +40,12 @@ CF_GRAMMAR CF_ADDTO(proto, kern_proto '}') -kern_proto_start: proto_start KERNEL { this_proto = krt_init_config($1); } +kern_proto_start: proto_start KERNEL { + this_proto = krt_init_config($1); +} ; CF_ADDTO(kern_proto, kern_proto_start proto_name '{') -CF_ADDTO(kern_proto, kern_proto proto_item ';') CF_ADDTO(kern_proto, kern_proto kern_item ';') kern_mp_limit: @@ -40,10 +54,12 @@ kern_mp_limit: ; kern_item: - PERSIST bool { THIS_KRT->persist = $2; } + proto_item + | proto_channel { this_proto->net_type = $1->net_type; } + | PERSIST bool { THIS_KRT->persist = $2; } | SCAN TIME expr { /* Scan time of 0 means scan on startup only */ - THIS_KRT->scan_time = $3; + THIS_KRT->scan_time = $3 S_; } | LEARN bool { THIS_KRT->learn = $2; @@ -52,7 +68,6 @@ kern_item: cf_error("Learning of kernel routes not supported on this platform"); #endif } - | DEVICE ROUTES bool { THIS_KRT->devroutes = $3; } | GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; } | MERGE PATHS bool kern_mp_limit { THIS_KRT->merge_paths = $3 ? $4 : 0; @@ -71,23 +86,42 @@ kif_proto_start: proto_start DEVICE { this_proto = kif_init_config($1); } ; CF_ADDTO(kif_proto, kif_proto_start proto_name '{') -CF_ADDTO(kif_proto, kif_proto proto_item ';') CF_ADDTO(kif_proto, kif_proto kif_item ';') kif_item: - SCAN TIME expr { + proto_item + | INTERFACE kif_iface + | SCAN TIME expr { /* Scan time of 0 means scan on startup only */ - THIS_KIF->scan_time = $3; - } - | PRIMARY text_or_none prefix_or_ipa { - struct kif_primary_item *kpi = cfg_alloc(sizeof (struct kif_primary_item)); - kpi->pattern = $2; - kpi->prefix = $3.addr; - kpi->pxlen = $3.len; - add_tail(&THIS_KIF->primary, &kpi->n); + THIS_KIF->scan_time = $3 S_; } ; +kif_iface_start: +{ + this_ipatt = cfg_allocz(sizeof(struct kif_iface_config)); + add_tail(&THIS_KIF->iface_list, NODE this_ipatt); + init_list(&this_ipatt->ipn_list); +} + +kif_iface_item: + PREFERRED ipa { kif_set_preferred($2); } + ; + +kif_iface_opts: + /* empty */ + | kif_iface_opts kif_iface_item ';' + ; + +kif_iface_opt_list: + /* empty */ + | '{' kif_iface_opts '}' + ; + +kif_iface: + kif_iface_start iface_patt_list_nopx kif_iface_opt_list; + + CF_ADDTO(dynamic_attr, KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_KRT_SOURCE); }) CF_ADDTO(dynamic_attr, KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_KRT_METRIC); }) diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 526c0cab..e7bd79e3 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -56,9 +56,9 @@ #include "nest/route.h" #include "nest/protocol.h" #include "filter/filter.h" -#include "lib/timer.h" #include "conf/conf.h" #include "lib/string.h" +#include "lib/timer.h" #include "unix.h" #include "krt.h" @@ -75,7 +75,7 @@ void krt_io_init(void) { krt_pool = rp_new(&root_pool, "Kernel Syncer"); - krt_filter_lp = lp_new(krt_pool, 4080); + krt_filter_lp = lp_new_default(krt_pool); init_list(&krt_proto_list); krt_sys_io_init(); } @@ -87,7 +87,17 @@ krt_io_init(void) struct kif_proto *kif_proto; static struct kif_config *kif_cf; static timer *kif_scan_timer; -static bird_clock_t kif_last_shot; +static btime kif_last_shot; + +static struct kif_iface_config kif_default_iface = {}; + +struct kif_iface_config * +kif_get_iface_config(struct iface *iface) +{ + struct kif_config *cf = (void *) (kif_proto->p.cf); + struct kif_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); + return ic ?: &kif_default_iface; +} static void kif_scan(timer *t) @@ -95,14 +105,14 @@ kif_scan(timer *t) struct kif_proto *p = t->data; KRT_TRACE(p, D_EVENTS, "Scanning interfaces"); - kif_last_shot = now; + kif_last_shot = current_time(); kif_do_scan(p); } static void kif_force_scan(void) { - if (kif_proto && kif_last_shot + 2 < now) + if (kif_proto && ((kif_last_shot + 2 S) < current_time())) { kif_scan(kif_scan_timer); tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time); @@ -112,65 +122,14 @@ kif_force_scan(void) void kif_request_scan(void) { - if (kif_proto && kif_scan_timer->expires > now) - tm_start(kif_scan_timer, 1); -} - -static inline int -prefer_addr(struct ifa *a, struct ifa *b) -{ - int sa = a->scope > SCOPE_LINK; - int sb = b->scope > SCOPE_LINK; - - if (sa < sb) - return 0; - else if (sa > sb) - return 1; - else - return ipa_compare(a->ip, b->ip) < 0; -} - -static inline struct ifa * -find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask) -{ - struct ifa *a, *b = NULL; - - WALK_LIST(a, i->addrs) - { - if (!(a->flags & IA_SECONDARY) && - ipa_equal(ipa_and(a->ip, mask), prefix) && - (!b || prefer_addr(a, b))) - b = a; - } - - return b; -} - -struct ifa * -kif_choose_primary(struct iface *i) -{ - struct kif_config *cf = (struct kif_config *) (kif_proto->p.cf); - struct kif_primary_item *it; - struct ifa *a; - - WALK_LIST(it, cf->primary) - { - if (!it->pattern || patmatch(it->pattern, i->name)) - if (a = find_preferred_ifa(i, it->prefix, ipa_mkmask(it->pxlen))) - return a; - } - - if (a = kif_get_primary_ip(i)) - return a; - - return find_preferred_ifa(i, IPA_NONE, IPA_NONE); + if (kif_proto && (kif_scan_timer->expires > (current_time() + 1 S))) + tm_start(kif_scan_timer, 1 S); } - static struct proto * kif_init(struct proto_config *c) { - struct kif_proto *p = proto_new(c, sizeof(struct kif_proto)); + struct kif_proto *p = proto_new(c); kif_sys_init(p); return &p->p; @@ -185,10 +144,7 @@ kif_start(struct proto *P) kif_sys_start(p); /* Start periodic interface scanning */ - kif_scan_timer = tm_new(P->pool); - kif_scan_timer->hook = kif_scan; - kif_scan_timer->data = p; - kif_scan_timer->recurrent = KIF_CF->scan_time; + kif_scan_timer = tm_new_init(P->pool, kif_scan, p, KIF_CF->scan_time, 0); kif_scan(kif_scan_timer); tm_start(kif_scan_timer, KIF_CF->scan_time); @@ -224,15 +180,15 @@ kif_reconfigure(struct proto *p, struct proto_config *new) tm_start(kif_scan_timer, n->scan_time); } - if (!EMPTY_LIST(o->primary) || !EMPTY_LIST(n->primary)) + if (!EMPTY_LIST(o->iface_list) || !EMPTY_LIST(n->iface_list)) { /* This is hack, we have to update a configuration * to the new value just now, because it is used - * for recalculation of primary addresses. + * for recalculation of preferred addresses. */ p->cf = new; - ifa_recalc_all_primary_addresses(); + if_recalc_all_preferred_addresses(); } return 1; @@ -253,8 +209,8 @@ kif_init_config(int class) cf_error("Kernel device protocol already defined"); kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, class); - kif_cf->scan_time = 60; - init_list(&kif_cf->primary); + kif_cf->scan_time = 60 S; + init_list(&kif_cf->iface_list); kif_sys_init_config(kif_cf); return (struct proto_config *) kif_cf; @@ -266,21 +222,17 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src) struct kif_config *d = (struct kif_config *) dest; struct kif_config *s = (struct kif_config *) src; - /* Shallow copy of everything (just scan_time currently) */ - proto_copy_rest(dest, src, sizeof(struct kif_config)); - - /* Copy primary addr list */ - cfg_copy_list(&d->primary, &s->primary, sizeof(struct kif_primary_item)); + /* Copy interface config list */ + cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct kif_iface_config)); /* Fix sysdep parts */ kif_sys_copy_config(d, s); } - struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", - .preference = DEF_PREF_DIRECT, + .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .preconfig = kif_preconfig, .init = kif_init, @@ -298,14 +250,14 @@ static inline void krt_trace_in(struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log(L_TRACE "%s: %I/%d: %s", p->p.name, e->net->n.prefix, e->net->n.pxlen, msg); + log(L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); } static inline void krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log_rl(f, L_TRACE "%s: %I/%d: %s", p->p.name, e->net->n.prefix, e->net->n.pxlen, msg); + log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); } /* @@ -348,19 +300,15 @@ krt_learn_announce_update(struct krt_proto *p, rte *e) net *n = e->net; rta *aa = rta_clone(e->attrs); rte *ee = rte_get_temp(aa); - net *nn = net_get(p->p.table, n->n.prefix, n->n.pxlen); - ee->net = nn; ee->pflags = 0; - ee->pref = p->p.preference; ee->u.krt = e->u.krt; - rte_update(&p->p, nn, ee); + rte_update(&p->p, n->n.addr, ee); } static void krt_learn_announce_delete(struct krt_proto *p, net *n) { - n = net_find(p->p.table, n->n.prefix, n->n.pxlen); - rte_update(&p->p, n, NULL); + rte_update(&p->p, n->n.addr, NULL); } /* Called when alien route is discovered during scan */ @@ -368,7 +316,7 @@ static void krt_learn_scan(struct krt_proto *p, rte *e) { net *n0 = e->net; - net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen); + net *n = net_get(&p->krt_table, n0->n.addr); rte *m, **mm; e->attrs = rta_lookup(e->attrs); @@ -412,9 +360,8 @@ krt_learn_prune(struct krt_proto *p) FIB_ITERATE_INIT(&fit, fib); again: - FIB_ITERATE_START(fib, &fit, f) + FIB_ITERATE_START(fib, &fit, net, n) { - net *n = (net *) f; rte *e, **ee, *best, **pbest, *old_best; /* @@ -455,8 +402,8 @@ again: if (old_best) krt_learn_announce_delete(p, n); - FIB_ITERATE_PUT(&fit, f); - fib_delete(fib, f); + FIB_ITERATE_PUT(&fit); + fib_delete(fib, n); goto again; } @@ -473,7 +420,7 @@ again: else DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); } - FIB_ITERATE_END(f); + FIB_ITERATE_END; p->reload = 0; } @@ -482,7 +429,7 @@ static void krt_learn_async(struct krt_proto *p, rte *e, int new) { net *n0 = e->net; - net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen); + net *n = net_get(&p->krt_table, n0->n.addr); rte *g, **gg, *best, **bestp, *old_best; e->attrs = rta_lookup(e->attrs); @@ -588,12 +535,11 @@ krt_dump_attrs(rte *e) static void krt_flush_routes(struct krt_proto *p) { - struct rtable *t = p->p.table; + struct rtable *t = p->p.main_channel->table; KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); - FIB_WALK(&t->fib, f) + FIB_WALK(&t->fib, net, n) { - net *n = (net *) f; rte *e = n->routes; if (rte_is_valid(e) && (n->n.flags & KRF_INSTALLED)) { @@ -608,12 +554,12 @@ krt_flush_routes(struct krt_proto *p) static struct rte * krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa) { - struct announce_hook *ah = p->p.main_ahook; - struct filter *filter = ah->out_filter; + struct channel *c = p->p.main_channel; + struct filter *filter = c->out_filter; rte *rt; - if (p->p.accept_ra_types == RA_MERGED) - return rt_export_merged(ah, net, rt_free, tmpa, krt_filter_lp, 1); + if (c->ra_mode == RA_MERGED) + return rt_export_merged(c, net, rt_free, tmpa, krt_filter_lp, 1); rt = net->routes; *rt_free = NULL; @@ -654,17 +600,11 @@ krt_same_dest(rte *k, rte *e) if (ka->dest != ea->dest) return 0; - switch (ka->dest) - { - case RTD_ROUTER: - return ipa_equal(ka->gw, ea->gw); - case RTD_DEVICE: - return !strcmp(ka->iface->name, ea->iface->name); - case RTD_MULTIPATH: - return mpnh_same(ka->nexthops, ea->nexthops); - default: - return 1; - } + + if (ka->dest == RTD_UNICAST) + return nexthop_same(&(ka->nh), &(ea->nh)); + + return 1; } /* @@ -760,13 +700,12 @@ krt_got_route(struct krt_proto *p, rte *e) static void krt_prune(struct krt_proto *p) { - struct rtable *t = p->p.table; + struct rtable *t = p->p.main_channel->table; KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); - FIB_WALK(&t->fib, f) + FIB_WALK(&t->fib, net, n) { - net *n = (net *) f; - int verdict = f->flags & KRF_VERDICT_MASK; + int verdict = n->n.flags & KRF_VERDICT_MASK; rte *new, *old, *rt_free = NULL; ea_list *tmpa = NULL; @@ -795,7 +734,7 @@ krt_prune(struct krt_proto *p) switch (verdict) { case KRF_CREATE: - if (new && (f->flags & KRF_INSTALLED)) + if (new && (n->n.flags & KRF_INSTALLED)) { krt_trace_in(p, new, "reinstalling"); krt_replace_rte(p, n, new, NULL, tmpa); @@ -822,7 +761,7 @@ krt_prune(struct krt_proto *p) if (rt_free) rte_free(rt_free); lp_flush(krt_filter_lp); - f->flags &= ~KRF_VERDICT_MASK; + n->n.flags &= ~KRF_VERDICT_MASK; } FIB_WALK_END; @@ -901,11 +840,11 @@ static void krt_scan_timer_start(struct krt_proto *p) { if (!krt_scan_count) - krt_scan_timer = tm_new_set(krt_pool, krt_scan, NULL, 0, KRT_CF->scan_time); + krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0); krt_scan_count++; - tm_start(krt_scan_timer, 1); + tm_start(krt_scan_timer, 1 S); } static void @@ -943,8 +882,8 @@ krt_scan(timer *t) static void krt_scan_timer_start(struct krt_proto *p) { - p->scan_timer = tm_new_set(p->p.pool, krt_scan, p, 0, KRT_CF->scan_time); - tm_start(p->scan_timer, 1); + p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0); + tm_start(p->scan_timer, 1 S); } static void @@ -1000,7 +939,7 @@ krt_store_tmp_attrs(rte *rt, struct ea_list *attrs) static int krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) { - struct krt_proto *p = (struct krt_proto *) P; + // struct krt_proto *p = (struct krt_proto *) P; rte *e = *new; if (e->attrs->src->proto == P) @@ -1021,11 +960,6 @@ krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li return -1; } - if (!KRT_CF->devroutes && - (e->attrs->dest == RTD_DEVICE) && - (e->attrs->source != RTS_STATIC_DEVICE)) - return -1; - if (!krt_capable(e)) return -1; @@ -1033,7 +967,7 @@ krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li } static void -krt_rt_notify(struct proto *P, struct rtable *table UNUSED, net *net, +krt_rt_notify(struct proto *P, struct channel *ch UNUSED, net *net, rte *new, rte *old, struct ea_list *eattrs) { struct krt_proto *p = (struct krt_proto *) P; @@ -1067,10 +1001,10 @@ krt_if_notify(struct proto *P, uint flags, struct iface *iface UNUSED) krt_scan_timer_kick(p); } -static int -krt_reload_routes(struct proto *P) +static void +krt_reload_routes(struct channel *C) { - struct krt_proto *p = (struct krt_proto *) P; + struct krt_proto *p = (void *) C->proto; /* Although we keep learned routes in krt_table, we rather schedule a scan */ @@ -1079,14 +1013,12 @@ krt_reload_routes(struct proto *P) p->reload = 1; krt_scan_timer_kick(p); } - - return 1; } static void -krt_feed_end(struct proto *P) +krt_feed_end(struct channel *C) { - struct krt_proto *p = (struct krt_proto *) P; + struct krt_proto *p = (void *) C->proto; p->ready = 1; krt_scan_timer_kick(p); @@ -1107,14 +1039,49 @@ krt_rte_same(rte *a, rte *b) struct krt_config *krt_cf; +static void +krt_preconfig(struct protocol *P UNUSED, struct config *c) +{ + krt_cf = NULL; + krt_sys_preconfig(c); +} + +static void +krt_postconfig(struct proto_config *CF) +{ + struct krt_config *cf = (void *) CF; + + if (EMPTY_LIST(CF->channels)) + cf_error("Channel not specified"); + +#ifdef CONFIG_ALL_TABLES_AT_ONCE + if (krt_cf->scan_time != cf->scan_time) + cf_error("All kernel syncers must use the same table scan interval"); +#endif + + struct channel_config *cc = proto_cf_main_channel(CF); + struct rtable_config *tab = cc->table; + if (tab->krt_attached) + cf_error("Kernel syncer (%s) already attached to table %s", tab->krt_attached->name, tab->name); + tab->krt_attached = CF; + + if (cf->merge_paths) + { + cc->ra_mode = RA_MERGED; + cc->merge_limit = cf->merge_paths; + } + + krt_sys_postconfig(cf); +} + static struct proto * -krt_init(struct proto_config *C) +krt_init(struct proto_config *CF) { - struct krt_proto *p = proto_new(C, sizeof(struct krt_proto)); - struct krt_config *c = (struct krt_config *) C; + struct krt_proto *p = proto_new(CF); + // struct krt_config *cf = (void *) CF; + + p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF)); - p->p.accept_ra_types = c->merge_paths ? RA_MERGED : RA_OPTIMAL; - p->p.merge_limit = c->merge_paths; p->p.import_control = krt_import_control; p->p.rt_notify = krt_rt_notify; p->p.if_notify = krt_if_notify; @@ -1133,6 +1100,16 @@ krt_start(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; + switch (p->p.net_type) + { + case NET_IP4: p->af = AF_INET; break; + case NET_IP6: p->af = AF_INET6; break; +#ifdef AF_MPLS + case NET_MPLS: p->af = AF_MPLS; break; +#endif + default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break; + } + add_tail(&krt_proto_list, &p->krt_node); #ifdef KRT_ALLOW_LEARN @@ -1147,8 +1124,8 @@ krt_start(struct proto *P) krt_scan_timer_start(p); - if (P->gr_recovery && KRT_CF->graceful_restart) - P->gr_wait = 1; + if (p->p.gr_recovery && KRT_CF->graceful_restart) + p->p.main_channel->gr_wait = 1; return PS_UP; } @@ -1177,40 +1154,19 @@ krt_shutdown(struct proto *P) } static int -krt_reconfigure(struct proto *p, struct proto_config *new) +krt_reconfigure(struct proto *p, struct proto_config *CF) { - struct krt_config *o = (struct krt_config *) p->cf; - struct krt_config *n = (struct krt_config *) new; + struct krt_config *o = (void *) p->cf; + struct krt_config *n = (void *) CF; + + if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF))) + return 0; if (!krt_sys_reconfigure((struct krt_proto *) p, n, o)) return 0; /* persist, graceful restart need not be the same */ - return o->scan_time == n->scan_time && o->learn == n->learn && - o->devroutes == n->devroutes && o->merge_paths == n->merge_paths; -} - -static void -krt_preconfig(struct protocol *P UNUSED, struct config *c) -{ - krt_cf = NULL; - krt_sys_preconfig(c); -} - -static void -krt_postconfig(struct proto_config *C) -{ - struct krt_config *c = (struct krt_config *) C; - -#ifdef CONFIG_ALL_TABLES_AT_ONCE - if (krt_cf->scan_time != c->scan_time) - cf_error("All kernel syncers must use the same table scan interval"); -#endif - - if (C->table->krt_attached) - cf_error("Kernel syncer (%s) already attached to table %s", C->table->krt_attached->name, C->table->name); - C->table->krt_attached = C; - krt_sys_postconfig(c); + return o->scan_time == n->scan_time && o->learn == n->learn; } struct proto_config * @@ -1222,7 +1178,7 @@ krt_init_config(int class) #endif krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, class); - krt_cf->scan_time = 60; + krt_cf->scan_time = 60 S; krt_sys_init_config(krt_cf); return (struct proto_config *) krt_cf; @@ -1234,9 +1190,6 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src) struct krt_config *d = (struct krt_config *) dest; struct krt_config *s = (struct krt_config *) src; - /* Shallow copy of everything */ - proto_copy_rest(dest, src, sizeof(struct krt_config)); - /* Fix sysdep parts */ krt_sys_copy_config(d, s); } @@ -1265,6 +1218,12 @@ struct protocol proto_unix_kernel = { .template = "kernel%d", .attr_class = EAP_KRT, .preference = DEF_PREF_INHERITED, +#ifdef HAVE_MPLS_KERNEL + .channel_mask = NB_IP | NB_MPLS, +#else + .channel_mask = NB_IP, +#endif + .proto_size = sizeof(struct krt_proto), .config_size = sizeof(struct krt_config), .preconfig = krt_preconfig, .postconfig = krt_postconfig, diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index d4a8717e..b627882d 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -15,7 +15,9 @@ struct krt_proto; struct kif_config; struct kif_proto; -#include "lib/krt-sys.h" +#include "nest/iface.h" +#include "sysdep/config.h" +#include CONFIG_INCLUDE_KRTSYS_H /* Flags stored in net->n.flags, rest are in nest/route.h */ @@ -44,10 +46,9 @@ extern struct protocol proto_unix_kernel; struct krt_config { struct proto_config c; struct krt_params sys; /* Sysdep params */ + btime scan_time; /* How often we re-scan routes */ int persist; /* Keep routes when we exit */ - int scan_time; /* How often we re-scan routes */ int learn; /* Learn routes from other sources */ - int devroutes; /* Allow export of device routes */ int graceful_restart; /* Regard graceful restart recovery */ int merge_paths; /* Exported routes are merged for ECMP */ }; @@ -65,6 +66,7 @@ struct krt_proto { #endif node krt_node; /* Node in krt_proto_list */ + byte af; /* Kernel address family (AF_*) */ byte ready; /* Initial feed has been finished */ byte initialized; /* First scan has been finished */ byte reload; /* Next scan is doing reload */ @@ -93,18 +95,20 @@ void krt_got_route_async(struct krt_proto *p, struct rte *e, int new); extern struct protocol proto_unix_iface; -struct kif_primary_item { - node n; - byte *pattern; - ip_addr prefix; - int pxlen; -}; - struct kif_config { struct proto_config c; struct kif_params sys; /* Sysdep params */ - int scan_time; /* How often we re-scan interfaces */ - list primary; /* Preferences for primary addresses (struct kif_primary_item) */ + + list iface_list; /* List of iface configs (struct kif_iface_config) */ + btime scan_time; /* How often we re-scan interfaces */ +}; + +struct kif_iface_config { + struct iface_patt i; + + ip_addr pref_v4; + ip_addr pref_v6; + ip_addr pref_ll; }; struct kif_proto { @@ -112,10 +116,11 @@ struct kif_proto { struct kif_state sys; /* Sysdep state */ }; -struct kif_proto *kif_proto; +extern struct kif_proto *kif_proto; #define KIF_CF ((struct kif_config *)p->p.cf) +struct kif_iface_config * kif_get_iface_config(struct iface *iface); struct proto_config * krt_init_config(int class); @@ -150,6 +155,6 @@ void kif_sys_copy_config(struct kif_config *, struct kif_config *); void kif_do_scan(struct kif_proto *); -struct ifa *kif_get_primary_ip(struct iface *i); +int kif_update_sysdep_addr(struct iface *i); #endif diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 88a7188c..f9dccc39 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -27,7 +27,7 @@ #include "nest/mrtdump.h" #include "lib/string.h" #include "lib/lists.h" -#include "lib/unix.h" +#include "sysdep/unix/unix.h" static FILE *dbgf; static list *current_log_list; @@ -120,7 +120,7 @@ log_commit(int class, buffer *buf) else { byte tbuf[TM_DATETIME_BUFFER_SIZE]; - tm_format_datetime(tbuf, &config->tf_log, now); + tm_format_real_time(tbuf, config->tf_log.fmt1, current_real_time()); fprintf(l->fh, "%s <%s> ", tbuf, class_names[class]); } fputs(buf->start, l->fh); @@ -180,19 +180,18 @@ log_msg(const char *msg, ...) void log_rl(struct tbf *f, const char *msg, ...) { - int last_hit = f->mark; int class = 1; va_list args; /* Rate limiting is a bit tricky here as it also logs '...' during the first hit */ - if (tbf_limit(f) && last_hit) + if (tbf_limit(f) && (f->drop > 1)) return; if (*msg >= 1 && *msg <= 8) class = *msg++; va_start(args, msg); - vlog(class, (f->mark ? "..." : msg), args); + vlog(class, (f->drop ? "..." : msg), args); va_end(args); } @@ -332,7 +331,7 @@ void mrt_dump_message(struct proto *p, u16 type, u16 subtype, byte *buf, u32 len) { /* Prepare header */ - put_u32(buf+0, now_real); + put_u32(buf+0, current_real_time() TO_S); put_u16(buf+4, type); put_u16(buf+6, subtype); put_u32(buf+8, len - MRTDUMP_HDR_LENGTH); diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 8aa19fce..2251d3fb 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -27,6 +27,7 @@ #include "lib/resource.h" #include "lib/socket.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/string.h" #include "nest/route.h" #include "nest/protocol.h" @@ -56,7 +57,7 @@ async_dump(void) rdump(&root_pool); sk_dump_all(); - tm_dump_all(); + // XXXX tm_dump_all(); if_dump_all(); neigh_dump_all(); rta_dump_all(); @@ -71,7 +72,7 @@ async_dump(void) */ #ifdef CONFIG_RESTRICTED_PRIVILEGES -#include "lib/syspriv.h" +#include CONFIG_INCLUDE_SYSPRIV_H #else static inline void @@ -302,7 +303,7 @@ cmd_reconfig_undo_notify(void) } void -cmd_reconfig(char *name, int type, int timeout) +cmd_reconfig(char *name, int type, uint timeout) { if (cli_access_restricted()) return; @@ -571,6 +572,10 @@ sysdep_shutdown_done(void) * Signals */ +volatile int async_config_flag; +volatile int async_dump_flag; +volatile int async_shutdown_flag; + static void handle_sighup(int sig UNUSED) { @@ -815,12 +820,14 @@ main(int argc, char **argv) log_init_debug(""); log_switch(debug_flag, NULL, NULL); + net_init(); resource_init(); + timer_init(); olock_init(); io_init(); rt_init(); if_init(); - roa_init(); +// roa_init(); config_init(); uid_t use_uid = get_uid(use_user); diff --git a/sysdep/unix/timer.h b/sysdep/unix/timer.h deleted file mode 100644 index aa3ed143..00000000 --- a/sysdep/unix/timer.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * BIRD -- Unix Timers - * - * (c) 1998 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_TIMER_H_ -#define _BIRD_TIMER_H_ - -#include <time.h> - -#include "lib/resource.h" - -typedef time_t bird_clock_t; /* Use instead of time_t */ - -typedef struct timer { - resource r; - void (*hook)(struct timer *); - void *data; - uint randomize; /* Amount of randomization */ - uint recurrent; /* Timer recurrence */ - node n; /* Internal link */ - bird_clock_t expires; /* 0=inactive */ -} timer; - -timer *tm_new(pool *); -void tm_start(timer *, uint after); -void tm_stop(timer *); -void tm_dump_all(void); - -extern bird_clock_t now; /* Relative, monotonic time in seconds */ -extern bird_clock_t now_real; /* Time in seconds since fixed known epoch */ -extern bird_clock_t boot_time; - -static inline int -tm_active(timer *t) -{ - return t->expires != 0; -} - -static inline bird_clock_t -tm_remains(timer *t) -{ - return t->expires ? t->expires - now : 0; -} - -static inline void -tm_start_max(timer *t, bird_clock_t after) -{ - bird_clock_t rem = tm_remains(t); - tm_start(t, (rem > after) ? rem : after); -} - -static inline timer * -tm_new_set(pool *p, void (*hook)(struct timer *), void *data, uint rand, uint rec) -{ - timer *t = tm_new(p); - t->hook = hook; - t->data = data; - t->randomize = rand; - t->recurrent = rec; - return t; -} - - -struct timeformat { - char *fmt1, *fmt2; - bird_clock_t limit; -}; - -bird_clock_t tm_parse_date(char *); /* Convert date to bird_clock_t */ -bird_clock_t tm_parse_datetime(char *); /* Convert date to bird_clock_t */ - -#define TM_DATETIME_BUFFER_SIZE 32 /* Buffer size required by tm_format_datetime */ -void -tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t); - -#define TIME_T_IS_64BIT (sizeof(time_t) == 8) -#define TIME_T_IS_SIGNED ((time_t) -1 < 0) - -#define TIME_INFINITY \ - ((time_t) (TIME_T_IS_SIGNED ? \ - (TIME_T_IS_64BIT ? 0x7fffffffffffffff : 0x7fffffff): \ - (TIME_T_IS_64BIT ? 0xffffffffffffffff : 0xffffffff))) - -#endif diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 3ef2e3ef..cb12fad8 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -22,7 +22,7 @@ void async_config(void); void async_dump(void); void async_shutdown(void); void cmd_check_config(char *name); -void cmd_reconfig(char *name, int type, int timeout); +void cmd_reconfig(char *name, int type, uint timeout); void cmd_reconfig_confirm(void); void cmd_reconfig_undo(void); void cmd_shutdown(void); @@ -47,14 +47,6 @@ typedef struct sockaddr_bird { } sockaddr; -#ifdef IPV6 -#define BIRD_AF AF_INET6 -#define ipa_from_sa(x) ipa_from_sa6(x) -#else -#define BIRD_AF AF_INET -#define ipa_from_sa(x) ipa_from_sa4(x) -#endif - /* This is sloppy hack, it should be detected by configure script */ /* Linux systems have it defined so this is definition for BSD systems */ @@ -63,29 +55,36 @@ typedef struct sockaddr_bird { #endif -static inline ip_addr ipa_from_in4(struct in_addr a UNUSED6) +static inline ip_addr ipa_from_in4(struct in_addr a) { return ipa_from_u32(ntohl(a.s_addr)); } -static inline ip_addr ipa_from_in6(struct in6_addr a UNUSED4) +static inline ip_addr ipa_from_in6(struct in6_addr a) { return ipa_build6(ntohl(a.s6_addr32[0]), ntohl(a.s6_addr32[1]), ntohl(a.s6_addr32[2]), ntohl(a.s6_addr32[3])); } -static inline ip_addr ipa_from_sa4(sockaddr *sa UNUSED6) +static inline ip_addr ipa_from_sa4(sockaddr *sa) { return ipa_from_in4(((struct sockaddr_in *) sa)->sin_addr); } -static inline ip_addr ipa_from_sa6(sockaddr *sa UNUSED4) +static inline ip_addr ipa_from_sa6(sockaddr *sa) { return ipa_from_in6(((struct sockaddr_in6 *) sa)->sin6_addr); } +static inline ip_addr ipa_from_sa(sockaddr *sa) +{ + switch (sa->sa.sa_family) + { + case AF_INET: return ipa_from_sa4(sa); + case AF_INET6: return ipa_from_sa6(sa); + default: return IPA_NONE; + } +} + static inline struct in_addr ipa_to_in4(ip_addr a) { return (struct in_addr) { htonl(ipa_to_u32(a)) }; } -#ifdef IPV6 +static inline struct in_addr ip4_to_in4(ip4_addr a) +{ return (struct in_addr) { htonl(ip4_to_u32(a)) }; } + static inline struct in6_addr ipa_to_in6(ip_addr a) { return (struct in6_addr) { .s6_addr32 = { htonl(_I0(a)), htonl(_I1(a)), htonl(_I2(a)), htonl(_I3(a)) } }; } -#else -/* Temporary dummy */ -static inline struct in6_addr ipa_to_in6(ip_addr a UNUSED) -{ return (struct in6_addr) { .s6_addr32 = { 0, 0, 0, 0 } }; } -#endif void sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port); int sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port); @@ -95,9 +94,9 @@ int sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *po #define SUN_LEN(ptr) ((size_t) (((struct sockaddr_un *) 0)->sun_path) + strlen ((ptr)->sun_path)) #endif -volatile int async_config_flag; -volatile int async_dump_flag; -volatile int async_shutdown_flag; +extern volatile int async_config_flag; +extern volatile int async_dump_flag; +extern volatile int async_shutdown_flag; void io_init(void); void io_loop(void); @@ -106,7 +105,6 @@ int sk_open_unix(struct birdsock *s, char *name); void *tracked_fopen(struct pool *, char *name, char *mode); void test_old_bird(char *path); - /* krt.c bits */ void krt_io_init(void); |