diff options
author | Maria Matejka <mq@ucw.cz> | 2022-05-30 15:36:54 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-05-30 15:36:54 +0200 |
commit | 45af19dd99b3c8f954621e09075e8be180e2f731 (patch) | |
tree | 2e25c82a8e13080d589c79ec0478e5127a9404fb | |
parent | ef3a8138c909baf58c61ddf2edd815eab899d3a6 (diff) | |
parent | 652be92a21f5575e5f74f6abe98eb4200b86776c (diff) |
Merge commit '652be92a21f5575e5f74f6abe98eb4200b86776c' into haugesund
-rw-r--r-- | .gitlab-ci.yml | 22 | ||||
-rw-r--r-- | client/client.c | 5 | ||||
-rw-r--r-- | distro/pkg/rpm/bird.service | 3 | ||||
-rw-r--r-- | doc/bird.sgml | 9 | ||||
-rw-r--r-- | filter/f-inst.c | 1 | ||||
-rw-r--r-- | lib/ip.c | 24 | ||||
-rw-r--r-- | nest/rt-table.c | 2 | ||||
-rw-r--r-- | proto/babel/babel.c | 13 | ||||
-rw-r--r-- | proto/babel/packets.c | 2 | ||||
-rw-r--r-- | proto/bfd/bfd.c | 31 | ||||
-rw-r--r-- | proto/bfd/bfd.h | 3 | ||||
-rw-r--r-- | proto/bfd/config.Y | 4 | ||||
-rw-r--r-- | proto/bfd/packets.c | 36 | ||||
-rw-r--r-- | proto/bgp/attrs.c | 7 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 3 | ||||
-rw-r--r-- | proto/rpki/rpki.c | 15 | ||||
-rw-r--r-- | proto/rpki/transport.c | 1 | ||||
-rw-r--r-- | sysdep/linux/netlink.c.orig | 2179 |
18 files changed, 135 insertions, 2225 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 678a08f6..7809fecd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -360,17 +360,17 @@ build-opensuse-15.3-amd64: <<: *build-linux image: registry.nic.cz/labs/bird:opensuse-15.3-amd64 -build-freebsd-11-amd64: - <<: *build-base - tags: - - freebsd - - amd64 - -build-freebsd-11-i386: - <<: *build-base - tags: - - freebsd - - i386 +#build-freebsd-11-amd64: +# <<: *build-base +# tags: +# - freebsd +# - amd64 + +#build-freebsd-11-i386: +# <<: *build-base +# tags: +# - freebsd +# - i386 .pkg-deb: &pkg-deb diff --git a/client/client.c b/client/client.c index 97cf6639..934e16e0 100644 --- a/client/client.c +++ b/client/client.c @@ -50,6 +50,7 @@ static byte *server_read_pos = server_read_buf; int init = 1; /* During intial sequence */ int busy = 1; /* Executing BIRD command */ int interactive; /* Whether stdin is terminal */ +int last_code; /* Last return code */ static int num_lines, skip_input; int term_lns, term_cls; @@ -196,7 +197,7 @@ init_commands(void) { /* Initial command is finished and we want to exit */ cleanup(); - exit(0); + exit((last_code < 8000) ? 0 : 1); } input_init(); @@ -283,6 +284,8 @@ server_got_reply(char *x) if (code) PRINTF(len, "%s\n", verbose ? x : x+5); + last_code = code; + if (x[4] == ' ') { busy = 0; diff --git a/distro/pkg/rpm/bird.service b/distro/pkg/rpm/bird.service index fa203c78..26bcb8a0 100644 --- a/distro/pkg/rpm/bird.service +++ b/distro/pkg/rpm/bird.service @@ -5,8 +5,9 @@ After=network.target [Service] Type=simple +ExecStartPre=/usr/sbin/bird -p ExecStart=/usr/sbin/bird -f -u bird -g bird -ExecReload=/bin/kill -HUP $MAINPID +ExecReload=/usr/sbin/birdc configure Restart=on-failure [Install] diff --git a/doc/bird.sgml b/doc/bird.sgml index 9e9237fa..692b7d89 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2153,6 +2153,13 @@ protocol bfd [<name>] { to configure separate BFD protocol instances for IPv4 and for IPv6 sessions. + <tag><label id="bfd-strict-bind">strict bind <m/switch/</tag> + Specify whether each BFD interface should use a separate listening + socket bound to its local address, or just use a shared listening socket + accepting all addresses. Binding to a specific address could be useful + in cases like running multiple BIRD instances on a machine, each + handling a different set of interfaces. Default: disabled. + <tag><label id="bfd-iface">interface <m/pattern/ [, <m/.../] { <m/options/ }</tag> Interface definitions allow to specify options for sessions associated with such interfaces and also may contain interface specific options. @@ -5252,7 +5259,7 @@ Note that for negated matches, value must be either zero or equal to bitmask <cf>port 1..1023,1194,3306</cf>). <tag><label id="flow-dport">dport <m/numbers-match/</tag> - Set a mating destination port numbers (e.g. <cf>dport 49151</cf>). + Set a matching destination port numbers (e.g. <cf>dport 49151</cf>). <tag><label id="flow-sport">sport <m/numbers-match/</tag> Set a matching source port numbers (e.g. <cf>sport = 0</cf>). diff --git a/filter/f-inst.c b/filter/f-inst.c index eb75ac62..7d008f4f 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -737,6 +737,7 @@ l->attrs[0].type = da.type; l->attrs[0].originated = 1; l->attrs[0].fresh = 1; + l->attrs[0].undef = 0; switch (da.type) { case EAF_TYPE_INT: @@ -85,25 +85,29 @@ ip4_classify(ip4_addr ad) u32 a = _I(ad); u32 b = a >> 24U; - if (b && b <= 0xdf) + if (b < 0xe0) { - if (b == 0x7f) + if (b == 0x00) /* 0.0.0.0/8 This network */ + return IADDR_INVALID; + + if (b == 0x7f) /* 127.0.0.0/8 Loopback address */ return IADDR_HOST | SCOPE_HOST; - else if ((b == 0x0a) || - ((a & 0xffff0000) == 0xc0a80000) || - ((a & 0xfff00000) == 0xac100000)) + + if ((b == 0x0a) || /* 10.0.0.0/8 Private range */ + ((a & 0xffff0000) == 0xc0a80000) || /* 192.168.0.0/16 Private range */ + ((a & 0xfff00000) == 0xac100000)) /* 172.16.0.0/12 Private range */ return IADDR_HOST | SCOPE_SITE; - else - return IADDR_HOST | SCOPE_UNIVERSE; + + return IADDR_HOST | SCOPE_UNIVERSE; } - if (b >= 0xe0 && b <= 0xef) + if (b < 0xf0) /* 224.0.0.0/4 Multicast address */ return IADDR_MULTICAST | SCOPE_UNIVERSE; - if (a == 0xffffffff) + if (a == 0xffffffff) /* 255.255.255.255 Broadcast address */ return IADDR_BROADCAST | SCOPE_LINK; - return IADDR_INVALID; + return IADDR_HOST | SCOPE_SITE; /* 240.0.0.0/4 Reserved / private */ } int diff --git a/nest/rt-table.c b/nest/rt-table.c index 30208f3f..fd6e1309 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -2294,7 +2294,7 @@ static void rt_prune_table(rtable *tab) { struct fib_iterator *fit = &tab->prune_fit; - int limit = 512; + int limit = 2000; struct rt_import_hook *ih; node *n, *x; diff --git a/proto/babel/babel.c b/proto/babel/babel.c index a8ca9e52..db54e4f1 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -862,14 +862,14 @@ babel_send_ihus(struct babel_iface *ifa) } static void -babel_send_hello(struct babel_iface *ifa) +babel_send_hello(struct babel_iface *ifa, uint interval) { struct babel_proto *p = ifa->proto; union babel_msg msg = {}; msg.type = BABEL_TLV_HELLO; msg.hello.seqno = ifa->hello_seqno++; - msg.hello.interval = ifa->cf->hello_interval; + msg.hello.interval = interval ?: ifa->cf->hello_interval; TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t", ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval); @@ -1577,7 +1577,7 @@ babel_iface_timer(timer *t) if (now_ >= ifa->next_hello) { - babel_send_hello(ifa); + babel_send_hello(ifa, 0); ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period); } @@ -1624,7 +1624,7 @@ babel_iface_start(struct babel_iface *ifa) tm_start(ifa->timer, 100 MS); ifa->up = 1; - babel_send_hello(ifa); + babel_send_hello(ifa, 0); babel_send_wildcard_retraction(ifa); babel_send_wildcard_request(ifa); babel_send_update(ifa, 0); /* Full update */ @@ -2435,6 +2435,11 @@ babel_iface_shutdown(struct babel_iface *ifa) { if (ifa->sk) { + /* + * Retract all our routes and lower the hello interval so peers' neighbour + * state expires quickly + */ + babel_send_hello(ifa, BABEL_MIN_INTERVAL); babel_send_wildcard_retraction(ifa); babel_send_queue(ifa); } diff --git a/proto/babel/packets.c b/proto/babel/packets.c index 2a6d443d..d4acc170 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -2010,7 +2010,7 @@ babel_auth_sign(struct babel_iface *ifa, ip_addr dest) } DBG("Added MAC signatures (%d bytes) on ifa %s for dest %I\n", - tot_len, ifa->ifname, dest); + pos - (pkt + len), ifa->ifname, dest); return pos - (pkt + len); } diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index d1e97cd5..873e2ed5 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -582,6 +582,9 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) ifa->sk = bfd_open_tx_sk(p, local, iface); ifa->uc = 1; + if (cf->strict_bind) + ifa->rx = bfd_open_rx_sk_bound(p, local, iface); + add_tail(&p->iface_list, &ifa->n); return ifa; @@ -599,6 +602,12 @@ bfd_free_iface(struct bfd_iface *ifa) rfree(ifa->sk); } + if (ifa->rx) + { + sk_stop(ifa->rx); + rfree(ifa->rx); + } + rem_node(&ifa->n); mb_free(ifa); } @@ -1031,17 +1040,20 @@ bfd_start(struct proto *P) birdloop_enter(p->loop); - if (cf->accept_ipv4 && cf->accept_direct) - p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); + if (!cf->strict_bind) + { + if (cf->accept_ipv4 && cf->accept_direct) + p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); - if (cf->accept_ipv4 && cf->accept_multihop) - p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); + if (cf->accept_ipv4 && cf->accept_multihop) + p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); - if (cf->accept_ipv6 && cf->accept_direct) - p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_direct) + p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); - if (cf->accept_ipv6 && cf->accept_multihop) - p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_multihop) + p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + } birdloop_leave(p->loop); @@ -1095,7 +1107,8 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) if ((new->accept_ipv4 != old->accept_ipv4) || (new->accept_ipv6 != old->accept_ipv6) || (new->accept_direct != old->accept_direct) || - (new->accept_multihop != old->accept_multihop)) + (new->accept_multihop != old->accept_multihop) || + (new->strict_bind != old->strict_bind)) return 0; birdloop_mask_wakeups(p->loop); diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 91fdaa60..7caf9f66 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -47,6 +47,7 @@ struct bfd_config u8 accept_ipv6; u8 accept_direct; u8 accept_multihop; + u8 strict_bind; }; struct bfd_iface_config @@ -116,6 +117,7 @@ struct bfd_iface struct bfd_proto *bfd; sock *sk; + sock *rx; u32 uc; u8 changed; }; @@ -221,6 +223,7 @@ void bfd_show_sessions(struct proto *P); /* packets.c */ void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final); sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version); +sock * bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa); sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa); diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index df1cba42..70461872 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -23,7 +23,8 @@ CF_DECLS CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE, INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL, AUTHENTICATION, - NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT) + NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT, + STRICT, BIND) %type <iface> bfd_neigh_iface %type <a> bfd_neigh_local @@ -48,6 +49,7 @@ bfd_proto_item: | INTERFACE bfd_iface | MULTIHOP bfd_multihop | NEIGHBOR bfd_neighbor + | STRICT BIND bool { BFD_CFG->strict_bind = $3; } ; bfd_proto_opts: diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 7618e20f..5f10734c 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -366,7 +366,9 @@ bfd_rx_hook(sock *sk, uint len) if (ps > BFD_STATE_DOWN) DROP("invalid init state", ps); - uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? sk->lifindex : 0; + uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? + (sk->iface ? sk->iface->index : sk->lifindex) : + 0; s = bfd_find_session_by_addr(p, sk->faddr, ifindex); /* FIXME: better session matching and message */ @@ -439,6 +441,38 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) } sock * +bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa) +{ + sock *sk = sk_new(p->tpool); + sk->type = SK_UDP; + sk->saddr = local; + sk->sport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; + sk->iface = ifa; + sk->vrf = p->p.vrf; + sk->data = p; + + sk->rbsize = BFD_MAX_LEN; + sk->rx_hook = bfd_rx_hook; + sk->err_hook = bfd_err_hook; + + /* TODO: configurable ToS and priority */ + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->priority = sk_priority_control; + sk->flags = SKF_THREAD | SKF_BIND | (ifa ? SKF_TTL_RX : 0); + + if (sk_open(sk) < 0) + goto err; + + sk_start(sk); + return sk; + + err: + sk_log_error(sk, p->p.name); + rfree(sk); + return NULL; +} + +sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) { sock *sk = sk_new(p->tpool); diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 9f31c28b..25f115af 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1465,7 +1465,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) #define RBH_FN(a,h) h #define RBH_REHASH bgp_rbh_rehash -#define RBH_PARAMS /8, *2, 2, 2, 8, 20 +#define RBH_PARAMS /8, *2, 2, 2, 12, 20 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) @@ -1607,7 +1607,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 24 +#define PXH_PARAMS /8, *2, 2, 2, 12, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1633,7 +1633,8 @@ bgp_free_prefix_table(struct bgp_channel *c) static struct bgp_prefix * bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) { - u32 hash = net_hash(net) ^ u32_hash(path_id); + /* We must use a different hash function than the rtable */ + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id)); struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); if (px) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 220551f6..279c834b 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -2467,6 +2467,9 @@ bgp_show_proto_info(struct proto *P) else cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT)) + cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); cli_msg(-1006, " Local AS: %u", p->cf->local_as); diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 766b8445..f1d32628 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -834,16 +834,27 @@ rpki_show_proto_info(struct proto *P) if (cache) { const char *transport_name = "---"; + uint default_port = 0; switch (cf->tr_config.type) { #if HAVE_LIBSSH - case RPKI_TR_SSH: transport_name = "SSHv2"; break; + case RPKI_TR_SSH: + transport_name = "SSHv2"; + default_port = RPKI_SSH_PORT; + break; #endif - case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break; + case RPKI_TR_TCP: + transport_name = "Unprotected over TCP"; + default_port = RPKI_TCP_PORT; + break; }; cli_msg(-1006, " Cache server: %s", cf->hostname); + + if (cf->port != default_port) + cli_msg(-1006, " Cache port: %u", cf->port); + cli_msg(-1006, " Status: %s", rpki_cache_state_to_str(cache->state)); cli_msg(-1006, " Transport: %s", transport_name); cli_msg(-1006, " Protocol version: %u", cache->version); diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c index a1ac7587..81bd6dd8 100644 --- a/proto/rpki/transport.c +++ b/proto/rpki/transport.c @@ -85,6 +85,7 @@ rpki_tr_open(struct rpki_tr_sock *tr) sk->rbsize = RPKI_RX_BUFFER_SIZE; sk->tbsize = RPKI_TX_BUFFER_SIZE; sk->tos = IP_PREC_INTERNET_CONTROL; + sk->vrf = cache->p->p.vrf; if (ipa_zero(sk->daddr) && sk->host) { diff --git a/sysdep/linux/netlink.c.orig b/sysdep/linux/netlink.c.orig deleted file mode 100644 index 7cea5322..00000000 --- a/sysdep/linux/netlink.c.orig +++ /dev/null @@ -1,2179 +0,0 @@ -/* - * BIRD -- Linux Netlink Interface - * - * (c) 1999--2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include <alloca.h> -#include <stdio.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/socket.h> -#include <sys/uio.h> -#include <errno.h> - -#undef LOCAL_DEBUG - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/protocol.h" -#include "nest/iface.h" -#include "lib/alloca.h" -#include "sysdep/unix/unix.h" -#include "sysdep/unix/krt.h" -#include "lib/socket.h" -#include "lib/string.h" -#include "lib/hash.h" -#include "conf/conf.h" - -#include <asm/types.h> -#include <linux/if.h> -#include <linux/netlink.h> -#include <linux/rtnetlink.h> - -#ifdef HAVE_MPLS_KERNEL -#include <linux/lwtunnel.h> -#endif - -#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */ -#define MSG_TRUNC 0x20 -#endif - -#ifndef IFA_FLAGS -#define IFA_FLAGS 8 -#endif - -#ifndef IFF_LOWER_UP -#define IFF_LOWER_UP 0x10000 -#endif - -#ifndef RTA_TABLE -#define RTA_TABLE 15 -#endif - -#ifndef RTA_VIA -#define RTA_VIA 18 -#endif - -#ifndef RTA_NEWDST -#define RTA_NEWDST 19 -#endif - -#ifndef RTA_ENCAP_TYPE -#define RTA_ENCAP_TYPE 21 -#endif - -#ifndef RTA_ENCAP -#define RTA_ENCAP 22 -#endif - -#define krt_ipv4(p) ((p)->af == AF_INET) -#define krt_ecmp6(p) ((p)->af == AF_INET6) - -const int rt_default_ecmp = 16; - -/* - * Structure nl_parse_state keeps state of received route processing. Ideally, - * we could just independently parse received Netlink messages and immediately - * propagate received routes to the rest of BIRD, but older Linux kernel (before - * version 4.11) represents and announces IPv6 ECMP routes not as one route with - * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of - * routes with the same prefix. More recent kernels work as with IPv4. - * - * Therefore, BIRD keeps currently processed route in nl_parse_state structure - * and postpones its propagation until we expect it to be final; i.e., when - * non-matching route is received or when the scan ends. When another matching - * route is received, it is merged with the already processed route to form an - * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the - * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6 - * routes with RTA_MULTIPATH set are just considered non-matching. - * - * This is ignored for asynchronous notifications (every notification is handled - * as a separate route). It is not an issue for our routes, as we ignore such - * notifications anyways. But importing alien IPv6 ECMP routes does not work - * properly with older kernels. - * - * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes - * for the same prefix. - */ - -struct nl_parse_state -{ - struct linpool *pool; - int scan; - int merge; - - net *net; - rta *attrs; - struct krt_proto *proto; - s8 new; - s8 krt_src; - u8 krt_type; - u8 krt_proto; - u32 krt_metric; - - u32 rta_flow; /* Used during parsing */ -}; - -/* - * Synchronous Netlink interface - */ - -struct nl_sock -{ - int fd; - u32 seq; - byte *rx_buffer; /* Receive buffer */ - struct nlmsghdr *last_hdr; /* Recently received packet */ - uint last_size; -}; - -#define NL_RX_SIZE 8192 - -#define NL_OP_DELETE 0 -#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL) -#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE) -#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND) - -static linpool *nl_linpool; - -static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */ -static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */ - -static void -nl_open_sock(struct nl_sock *nl) -{ - if (nl->fd < 0) - { - nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (nl->fd < 0) - die("Unable to open rtnetlink socket: %m"); - nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */ - nl->rx_buffer = xmalloc(NL_RX_SIZE); - nl->last_hdr = NULL; - nl->last_size = 0; - } -} - -static void -nl_open(void) -{ - nl_open_sock(&nl_scan); - nl_open_sock(&nl_req); -} - -static void -nl_send(struct nl_sock *nl, struct nlmsghdr *nh) -{ - struct sockaddr_nl sa; - - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - nh->nlmsg_pid = 0; - nh->nlmsg_seq = ++(nl->seq); - nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len); - if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0) - die("rtnetlink sendto: %m"); - nl->last_hdr = NULL; -} - -static void -nl_request_dump(int af, int cmd) -{ - struct { - struct nlmsghdr nh; - struct rtgenmsg g; - } req = { - .nh.nlmsg_type = cmd, - .nh.nlmsg_len = sizeof(req), - .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, - .g.rtgen_family = af - }; - nl_send(&nl_scan, &req.nh); -} - -static struct nlmsghdr * -nl_get_reply(struct nl_sock *nl) -{ - for(;;) - { - if (!nl->last_hdr) - { - struct iovec iov = { nl->rx_buffer, NL_RX_SIZE }; - struct sockaddr_nl sa; - struct msghdr m = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1, - }; - int x = recvmsg(nl->fd, &m, 0); - if (x < 0) - die("nl_get_reply: %m"); - if (sa.nl_pid) /* It isn't from the kernel */ - { - DBG("Non-kernel packet\n"); - continue; - } - nl->last_size = x; - nl->last_hdr = (void *) nl->rx_buffer; - if (m.msg_flags & MSG_TRUNC) - bug("nl_get_reply: got truncated reply which should be impossible"); - } - if (NLMSG_OK(nl->last_hdr, nl->last_size)) - { - struct nlmsghdr *h = nl->last_hdr; - nl->last_hdr = NLMSG_NEXT(h, nl->last_size); - if (h->nlmsg_seq != nl->seq) - { - log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)", - h->nlmsg_seq, nl->seq); - continue; - } - return h; - } - if (nl->last_size) - log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size); - nl->last_hdr = NULL; - } -} - -static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS; - -static int -nl_error(struct nlmsghdr *h, int ignore_esrch) -{ - struct nlmsgerr *e; - int ec; - - if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) - { - log(L_WARN "Netlink: Truncated error message received"); - return ENOBUFS; - } - e = (struct nlmsgerr *) NLMSG_DATA(h); - ec = -e->error; - if (ec && !(ignore_esrch && (ec == ESRCH))) - log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec)); - return ec; -} - -static struct nlmsghdr * -nl_get_scan(void) -{ - struct nlmsghdr *h = nl_get_reply(&nl_scan); - - if (h->nlmsg_type == NLMSG_DONE) - return NULL; - if (h->nlmsg_type == NLMSG_ERROR) - { - nl_error(h, 0); - return NULL; - } - return h; -} - -static int -nl_exchange(struct nlmsghdr *pkt, int ignore_esrch) -{ - struct nlmsghdr *h; - - nl_send(&nl_req, pkt); - for(;;) - { - h = nl_get_reply(&nl_req); - if (h->nlmsg_type == NLMSG_ERROR) - break; - log(L_WARN "nl_exchange: Unexpected reply received"); - } - return nl_error(h, ignore_esrch) ? -1 : 0; -} - -/* - * Netlink attributes - */ - -static int nl_attr_len; - -static void * -nl_checkin(struct nlmsghdr *h, int lsize) -{ - nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize); - if (nl_attr_len < 0) - { - log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len); - return NULL; - } - return NLMSG_DATA(h); -} - -struct nl_want_attrs { - u8 defined:1; - u8 checksize:1; - u8 size; -}; - - -#define BIRD_IFLA_MAX (IFLA_WIRELESS+1) - -static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = { - [IFLA_IFNAME] = { 1, 0, 0 }, - [IFLA_MTU] = { 1, 1, sizeof(u32) }, - [IFLA_MASTER] = { 1, 1, sizeof(u32) }, - [IFLA_WIRELESS] = { 1, 0, 0 }, -}; - - -#define BIRD_IFA_MAX (IFA_FLAGS+1) - -static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = { - [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) }, - [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) }, - [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) }, - [IFA_FLAGS] = { 1, 1, sizeof(u32) }, -}; - -static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { - [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) }, - [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) }, - [IFA_FLAGS] = { 1, 1, sizeof(u32) }, -}; - - -#define BIRD_RTA_MAX (RTA_ENCAP+1) - -static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = { - [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) }, - [RTA_VIA] = { 1, 0, 0 }, - [RTA_FLOW] = { 1, 1, sizeof(u32) }, - [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, - [RTA_ENCAP] = { 1, 0, 0 }, -}; - -static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = { - [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) }, - [RTA_VIA] = { 1, 0, 0 }, - [RTA_FLOW] = { 1, 1, sizeof(u32) }, - [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, - [RTA_ENCAP] = { 1, 0, 0 }, -}; - -#ifdef HAVE_MPLS_KERNEL -static struct nl_want_attrs nexthop_attr_want_mpls[BIRD_RTA_MAX] = { - [RTA_VIA] = { 1, 0, 0 }, - [RTA_NEWDST] = { 1, 0, 0 }, -}; - -static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = { - [RTA_DST] = { 1, 0, 0 }, -}; -#endif - -static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { - [RTA_DST] = { 1, 1, sizeof(ip4_addr) }, - [RTA_OIF] = { 1, 1, sizeof(u32) }, - [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) }, - [RTA_PRIORITY] = { 1, 1, sizeof(u32) }, - [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) }, - [RTA_METRICS] = { 1, 0, 0 }, - [RTA_MULTIPATH] = { 1, 0, 0 }, - [RTA_FLOW] = { 1, 1, sizeof(u32) }, - [RTA_TABLE] = { 1, 1, sizeof(u32) }, - [RTA_VIA] = { 1, 0, 0 }, - [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, - [RTA_ENCAP] = { 1, 0, 0 }, -}; - -static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { - [RTA_DST] = { 1, 1, sizeof(ip6_addr) }, - [RTA_SRC] = { 1, 1, sizeof(ip6_addr) }, - [RTA_IIF] = { 1, 1, sizeof(u32) }, - [RTA_OIF] = { 1, 1, sizeof(u32) }, - [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) }, - [RTA_PRIORITY] = { 1, 1, sizeof(u32) }, - [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) }, - [RTA_METRICS] = { 1, 0, 0 }, - [RTA_MULTIPATH] = { 1, 0, 0 }, - [RTA_FLOW] = { 1, 1, sizeof(u32) }, - [RTA_TABLE] = { 1, 1, sizeof(u32) }, - [RTA_VIA] = { 1, 0, 0 }, - [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, - [RTA_ENCAP] = { 1, 0, 0 }, -}; - -#ifdef HAVE_MPLS_KERNEL -static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = { - [RTA_DST] = { 1, 1, sizeof(u32) }, - [RTA_IIF] = { 1, 1, sizeof(u32) }, - [RTA_OIF] = { 1, 1, sizeof(u32) }, - [RTA_PRIORITY] = { 1, 1, sizeof(u32) }, - [RTA_METRICS] = { 1, 0, 0 }, - [RTA_MULTIPATH] = { 1, 0, 0 }, - [RTA_FLOW] = { 1, 1, sizeof(u32) }, - [RTA_TABLE] = { 1, 1, sizeof(u32) }, - [RTA_VIA] = { 1, 0, 0 }, - [RTA_NEWDST] = { 1, 0, 0 }, -}; -#endif - - -static int -nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize) -{ - int max = ksize / sizeof(struct rtattr *); - bzero(k, ksize); - - for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len)) - { - if ((a->rta_type >= max) || !want[a->rta_type].defined) - continue; - - if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size)) - { - log(L_ERR "nl_parse_attrs: Malformed attribute received"); - return 0; - } - - k[a->rta_type] = a; - } - - if (nl_attr_len) - { - log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len); - return 0; - } - - return 1; -} - -static inline u16 rta_get_u16(struct rtattr *a) -{ return *(u16 *) RTA_DATA(a); } - -static inline u32 rta_get_u32(struct rtattr *a) -{ return *(u32 *) RTA_DATA(a); } - -static inline ip4_addr rta_get_ip4(struct rtattr *a) -{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); } - -static inline ip6_addr rta_get_ip6(struct rtattr *a) -{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); } - -static inline ip_addr rta_get_ipa(struct rtattr *a) -{ - if (RTA_PAYLOAD(a) == sizeof(ip4_addr)) - return ipa_from_ip4(rta_get_ip4(a)); - else - return ipa_from_ip6(rta_get_ip6(a)); -} - -#ifdef HAVE_MPLS_KERNEL -static inline ip_addr rta_get_via(struct rtattr *a) -{ - struct rtvia *v = RTA_DATA(a); - switch(v->rtvia_family) { - case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr)); - case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr)); - } - return IPA_NONE; -} - -static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK]; -static inline int rta_get_mpls(struct rtattr *a, u32 *stack) -{ - if (!a) - return 0; - - if (RTA_PAYLOAD(a) % 4) - log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a)); - - int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack); - - if (labels < 0) - { - log(L_WARN "KRT: Too long MPLS stack received, ignoring"); - labels = 0; - } - - return labels; -} -#endif - -struct rtattr * -nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen) -{ - uint pos = NLMSG_ALIGN(h->nlmsg_len); - uint len = RTA_LENGTH(dlen); - - if (pos + len > bufsize) - bug("nl_add_attr: packet buffer overflow"); - - struct rtattr *a = (struct rtattr *)((char *)h + pos); - a->rta_type = code; - a->rta_len = len; - h->nlmsg_len = pos + len; - - if (dlen > 0) - memcpy(RTA_DATA(a), data, dlen); - - return a; -} - -static inline struct rtattr * -nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) -{ - return nl_add_attr(h, bufsize, code, NULL, 0); -} - -static inline void -nl_close_attr(struct nlmsghdr *h, struct rtattr *a) -{ - a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; -} - -static inline void -nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data) -{ - nl_add_attr(h, bufsize, code, &data, 2); -} - -static inline void -nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data) -{ - nl_add_attr(h, bufsize, code, &data, 4); -} - -static inline void -nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4) -{ - ip4 = ip4_hton(ip4); - nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4)); -} - -static inline void -nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6) -{ - ip6 = ip6_hton(ip6); - nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6)); -} - -static inline void -nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa) -{ - if (ipa_is_ip4(ipa)) - nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa)); - else - nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa)); -} - -#ifdef HAVE_MPLS_KERNEL -static inline void -nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack) -{ - char buf[len*4]; - mpls_put(buf, len, stack); - nl_add_attr(h, bufsize, code, buf, len*4); -} - -static inline void -nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack) -{ - nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS); - - struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP); - nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack); - nl_close_attr(h, nest); -} - -static inline void -nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa) -{ - struct rtvia *via = alloca(sizeof(struct rtvia) + 16); - - if (ipa_is_ip4(ipa)) - { - via->rtvia_family = AF_INET; - put_ip4(via->rtvia_addr, ipa_to_ip4(ipa)); - nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4); - } - else - { - via->rtvia_family = AF_INET6; - put_ip6(via->rtvia_addr, ipa_to_ip6(ipa)); - nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16); - } -} -#endif - -static inline struct rtnexthop * -nl_open_nexthop(struct nlmsghdr *h, uint bufsize) -{ - uint pos = NLMSG_ALIGN(h->nlmsg_len); - uint len = RTNH_LENGTH(0); - - if (pos + len > bufsize) - bug("nl_open_nexthop: packet buffer overflow"); - - h->nlmsg_len = pos + len; - - return (void *)h + pos; -} - -static inline void -nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh) -{ - nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh; -} - -static inline void -nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED) -{ -#ifdef HAVE_MPLS_KERNEL - if (nh->labels > 0) - if (af == AF_MPLS) - nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label); - else - nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label); - - if (ipa_nonzero(nh->gw)) - { - if (af == (ipa_is_ip4(nh->gw) ? AF_INET : AF_INET6)) - nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); - else - nl_add_attr_via(h, bufsize, nh->gw); - } -#else - - if (ipa_nonzero(nh->gw)) - nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); -#endif -} - -static void -nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs) -{ - struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); - eattr *flow = ea_find(eattrs, EA_KRT_REALM); - - for (; nh; nh = nh->next) - { - struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize); - - rtnh->rtnh_flags = 0; - rtnh->rtnh_hops = nh->weight; - rtnh->rtnh_ifindex = nh->iface->index; - - nl_add_nexthop(h, bufsize, nh, af); - - if (nh->flags & RNF_ONLINK) - rtnh->rtnh_flags |= RTNH_F_ONLINK; - - /* Our KRT_REALM is per-route, but kernel RTA_FLOW is per-nexthop. - Therefore, we need to attach the same attribute to each nexthop. */ - if (flow) - nl_add_attr_u32(h, bufsize, RTA_FLOW, flow->u.data); - - nl_close_nexthop(h, rtnh); - } - - nl_close_attr(h, a); -} - -static struct nexthop * -nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src) -{ - struct rtattr *a[BIRD_RTA_MAX]; - struct rtnexthop *nh = RTA_DATA(ra); - struct nexthop *rv, *first, **last; - unsigned len = RTA_PAYLOAD(ra); - - first = NULL; - last = &first; - - while (len) - { - /* Use RTNH_OK(nh,len) ?? */ - if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) - goto err; - - if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) - goto next; - - *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); - last = &(rv->next); - - rv->weight = nh->rtnh_hops; - rv->iface = if_find_by_index(nh->rtnh_ifindex); - if (!rv->iface) - { - log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex); - return NULL; - } - - /* Nonexistent RTNH_PAYLOAD ?? */ - nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0); - switch (af) - { - case AF_INET: - if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a))) - goto err; - break; - - case AF_INET6: - if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a))) - goto err; - break; - -#ifdef HAVE_MPLS_KERNEL - case AF_MPLS: - if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a))) - goto err; - - if (a[RTA_NEWDST]) - rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label); - - break; -#endif - - default: - goto err; - } - - if (a[RTA_GATEWAY]) - rv->gw = rta_get_ipa(a[RTA_GATEWAY]); - - if (a[RTA_FLOW]) - s->rta_flow = rta_get_u32(a[RTA_FLOW]); - -#ifdef HAVE_MPLS_KERNEL - if (a[RTA_VIA]) - rv->gw = rta_get_via(a[RTA_VIA]); -#endif - - if (ipa_nonzero(rv->gw)) - { - if (nh->rtnh_flags & RTNH_F_ONLINK) - rv->flags |= RNF_ONLINK; - - neighbor *nbr; - nbr = neigh_find(&p->p, rv->gw, rv->iface, - (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0); - if (!nbr || (nbr->scope == SCOPE_HOST)) - { - log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw); - return NULL; - } - } - -#ifdef HAVE_MPLS_KERNEL - if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE]) - { - if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) - { - log(L_WARN "KRT: Received route %N with unknown encapsulation method %d", - n, rta_get_u16(a[RTA_ENCAP_TYPE])); - return NULL; - } - - struct rtattr *enca[BIRD_RTA_MAX]; - nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); - nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); - rv->labels = rta_get_mpls(enca[RTA_DST], rv->label); - } -#endif - - next: - len -= NLMSG_ALIGN(nh->rtnh_len); - nh = RTNH_NEXT(nh); - } - - /* Ensure nexthops are sorted to satisfy nest invariant */ - if (!nexthop_is_sorted(first)) - first = nexthop_sort(first); - - return first; - -err: - log(L_ERR "KRT: Received strange multipath route %N", n); - return NULL; -} - -static void -nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max) -{ - struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS); - int t; - - for (t = 1; t < max; t++) - if (metrics[0] & (1 << t)) - nl_add_attr_u32(h, bufsize, t, metrics[t]); - - nl_close_attr(h, a); -} - -static int -nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max) -{ - struct rtattr *a = RTA_DATA(hdr); - int len = RTA_PAYLOAD(hdr); - - metrics[0] = 0; - for (; RTA_OK(a, len); a = RTA_NEXT(a, len)) - { - if (a->rta_type == RTA_UNSPEC) - continue; - - if (a->rta_type >= max) - continue; - - if (RTA_PAYLOAD(a) != 4) - return -1; - - metrics[0] |= 1 << a->rta_type; - metrics[a->rta_type] = rta_get_u32(a); - } - - if (len > 0) - return -1; - - return 0; -} - - -/* - * Scanning of interfaces - */ - -static void -nl_parse_link(struct nlmsghdr *h, int scan) -{ - struct ifinfomsg *i; - struct rtattr *a[BIRD_IFLA_MAX]; - int new = h->nlmsg_type == RTM_NEWLINK; - struct iface f = {}; - struct iface *ifi; - char *name; - u32 mtu, master = 0; - uint fl; - - if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a))) - return; - if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU]) - { - /* - * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come - * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists. - * We simply ignore all such messages with IFLA_WIRELESS without notice. - */ - - if (a[IFLA_WIRELESS]) - return; - - log(L_ERR "KIF: Malformed message received"); - return; - } - - name = RTA_DATA(a[IFLA_IFNAME]); - mtu = rta_get_u32(a[IFLA_MTU]); - - if (a[IFLA_MASTER]) - master = rta_get_u32(a[IFLA_MASTER]); - - ifi = if_find_by_index(i->ifi_index); - if (!new) - { - DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name); - if (!ifi) - return; - - if_delete(ifi); - } - else - { - DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags); - if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1)) - if_delete(ifi); - - strncpy(f.name, name, sizeof(f.name)-1); - f.index = i->ifi_index; - f.mtu = mtu; - - f.master_index = master; - f.master = if_find_by_index(master); - - fl = i->ifi_flags; - if (fl & IFF_UP) - f.flags |= IF_ADMIN_UP; - if (fl & IFF_LOWER_UP) - f.flags |= IF_LINK_UP; - if (fl & IFF_LOOPBACK) /* Loopback */ - f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE; - else if (fl & IFF_POINTOPOINT) /* PtP */ - f.flags |= IF_MULTICAST; - else if (fl & IFF_BROADCAST) /* Broadcast */ - f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST; - else - f.flags |= IF_MULTIACCESS; /* NBMA */ - - if (fl & IFF_MULTICAST) - f.flags |= IF_MULTICAST; - - ifi = if_update(&f); - - if (!scan) - if_end_partial_update(ifi); - } -} - -static void -nl_parse_addr4(struct ifaddrmsg *i, int scan, int new) -{ - struct rtattr *a[BIRD_IFA_MAX]; - struct iface *ifi; - u32 ifa_flags; - int scope; - - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) - return; - - if (!a[IFA_LOCAL]) - { - log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)"); - return; - } - if (!a[IFA_ADDRESS]) - { - log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); - return; - } - - ifi = if_find_by_index(i->ifa_index); - if (!ifi) - { - log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index); - return; - } - - if (a[IFA_FLAGS]) - ifa_flags = rta_get_u32(a[IFA_FLAGS]); - else - ifa_flags = i->ifa_flags; - - struct ifa ifa; - bzero(&ifa, sizeof(ifa)); - ifa.iface = ifi; - if (ifa_flags & IFA_F_SECONDARY) - ifa.flags |= IA_SECONDARY; - - ifa.ip = rta_get_ipa(a[IFA_LOCAL]); - - if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH) - { - log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); - new = 0; - } - if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH) - { - ifa.brd = rta_get_ipa(a[IFA_ADDRESS]); - net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen); - - /* It is either a host address or a peer address */ - if (ipa_equal(ifa.ip, ifa.brd)) - ifa.flags |= IA_HOST; - else - { - ifa.flags |= IA_PEER; - ifa.opposite = ifa.brd; - } - } - else - { - net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen); - net_normalize(&ifa.prefix); - - if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1) - ifa.opposite = ipa_opposite_m1(ifa.ip); - - if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2) - ifa.opposite = ipa_opposite_m2(ifa.ip); - - if (ifi->flags & IF_BROADCAST) - { - /* If kernel offers us a broadcast address, we trust it */ - if (a[IFA_BROADCAST]) - ifa.brd = ipa_from_ip4(rta_get_ip4(a[IFA_BROADCAST])); - /* Otherwise we create one (except for /31) */ - else if (i->ifa_prefixlen < (IP4_MAX_PREFIX_LENGTH - 1)) - ifa.brd = ipa_from_ip4(ip4_or(ipa_to_ip4(ifa.ip), - ip4_not(ip4_mkmask(i->ifa_prefixlen)))); - } - } - - scope = ipa_classify(ifa.ip); - if (scope < 0) - { - log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name); - return; - } - ifa.scope = scope & IADDR_SCOPE_MASK; - - DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n", - ifi->index, ifi->name, - new ? "added" : "removed", - ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite); - - if (new) - ifa_update(&ifa); - else - ifa_delete(&ifa); - - if (!scan) - if_end_partial_update(ifi); -} - -static void -nl_parse_addr6(struct ifaddrmsg *i, int scan, int new) -{ - struct rtattr *a[BIRD_IFA_MAX]; - struct iface *ifi; - u32 ifa_flags; - int scope; - - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) - return; - - if (!a[IFA_ADDRESS]) - { - log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); - return; - } - - ifi = if_find_by_index(i->ifa_index); - if (!ifi) - { - log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index); - return; - } - - if (a[IFA_FLAGS]) - ifa_flags = rta_get_u32(a[IFA_FLAGS]); - else - ifa_flags = i->ifa_flags; - - struct ifa ifa; - bzero(&ifa, sizeof(ifa)); - ifa.iface = ifi; - if (ifa_flags & IFA_F_SECONDARY) - ifa.flags |= IA_SECONDARY; - - /* Ignore tentative addresses silently */ - if (ifa_flags & IFA_F_TENTATIVE) - return; - - /* IFA_LOCAL can be unset for IPv6 interfaces */ - ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]); - - if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH) - { - log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); - new = 0; - } - if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH) - { - ifa.brd = rta_get_ipa(a[IFA_ADDRESS]); - net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen); - - /* It is either a host address or a peer address */ - if (ipa_equal(ifa.ip, ifa.brd)) - ifa.flags |= IA_HOST; - else - { - ifa.flags |= IA_PEER; - ifa.opposite = ifa.brd; - } - } - else - { - net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen); - net_normalize(&ifa.prefix); - - if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1) - ifa.opposite = ipa_opposite_m1(ifa.ip); - } - - scope = ipa_classify(ifa.ip); - if (scope < 0) - { - log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name); - return; - } - ifa.scope = scope & IADDR_SCOPE_MASK; - - DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n", - ifi->index, ifi->name, - new ? "added" : "removed", - ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite); - - if (new) - ifa_update(&ifa); - else - ifa_delete(&ifa); - - if (!scan) - if_end_partial_update(ifi); -} - -static void -nl_parse_addr(struct nlmsghdr *h, int scan) -{ - struct ifaddrmsg *i; - - if (!(i = nl_checkin(h, sizeof(*i)))) - return; - - int new = (h->nlmsg_type == RTM_NEWADDR); - - switch (i->ifa_family) - { - case AF_INET: - return nl_parse_addr4(i, scan, new); - - case AF_INET6: - return nl_parse_addr6(i, scan, new); - } -} - -void -kif_do_scan(struct kif_proto *p UNUSED) -{ - struct nlmsghdr *h; - - if_start_update(); - - nl_request_dump(AF_UNSPEC, RTM_GETLINK); - while (h = nl_get_scan()) - if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK) - nl_parse_link(h, 1); - else - log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); - - /* Re-resolve master interface for slaves */ - struct iface *i; - WALK_LIST(i, iface_list) - if (i->master_index) - { - struct iface f = { - .flags = i->flags, - .mtu = i->mtu, - .index = i->index, - .master_index = i->master_index, - .master = if_find_by_index(i->master_index) - }; - - if (f.master != i->master) - { - memcpy(f.name, i->name, sizeof(f.name)); - if_update(&f); - } - } - - nl_request_dump(AF_INET, RTM_GETADDR); - while (h = nl_get_scan()) - if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) - nl_parse_addr(h, 1); - else - log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); - - nl_request_dump(AF_INET6, RTM_GETADDR); - while (h = nl_get_scan()) - if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) - nl_parse_addr(h, 1); - else - log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); - - if_end_update(); -} - -/* - * Routes - */ - -static inline u32 -krt_table_id(struct krt_proto *p) -{ - return KRT_CF->sys.table_id; -} - -static HASH(struct krt_proto) nl_table_map; - -#define RTH_KEY(p) p->af, krt_table_id(p) -#define RTH_NEXT(p) p->sys.hash_next -#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2 -#define RTH_FN(a,i) a ^ u32_hash(i) - -#define RTH_REHASH rth_rehash -#define RTH_PARAMS /8, *2, 2, 2, 6, 20 - -HASH_DEFINE_REHASH_FN(RTH, struct krt_proto) - -int -krt_capable(rte *e) -{ - rta *a = e->attrs; - - switch (a->dest) - { - case RTD_UNICAST: - case RTD_BLACKHOLE: - case RTD_UNREACHABLE: - case RTD_PROHIBIT: - return 1; - - default: - return 0; - } -} - -static inline int -nh_bufsize(struct nexthop *nh) -{ - int rv = 0; - for (; nh != NULL; nh = nh->next) - rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr))); - return rv; -} - -static int -nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) -{ - eattr *ea; - net *net = e->net; - rta *a = e->attrs; - ea_list *eattrs = a->eattrs; - int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); - u32 priority = 0; - - struct { - struct nlmsghdr h; - struct rtmsg r; - char buf[0]; - } *r; - - int rsize = sizeof(*r) + bufsize; - r = alloca(rsize); - - DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); - - bzero(&r->h, sizeof(r->h)); - bzero(&r->r, sizeof(r->r)); - r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; - r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; - - r->r.rtm_family = p->af; - r->r.rtm_dst_len = net_pxlen(net->n.addr); - r->r.rtm_protocol = RTPROT_BIRD; - r->r.rtm_scope = RT_SCOPE_NOWHERE; -#ifdef HAVE_MPLS_KERNEL - if (p->af == AF_MPLS) - { - /* - * Kernel MPLS code is a bit picky. We must: - * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE) - * 2) Never use RTA_PRIORITY - */ - - u32 label = net_mpls(net->n.addr); - nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label); - r->r.rtm_scope = RT_SCOPE_UNIVERSE; - r->r.rtm_type = RTN_UNICAST; - } - else -#endif - { - nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); - - /* Add source address for IPv6 SADR routes */ - if (net->n.addr->type == NET_IP6_SADR) - { - net_addr_ip6_sadr *a = (void *) &net->n.addr; - nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix); - r->r.rtm_src_len = a->src_pxlen; - } - } - - /* - * Strange behavior for RTM_DELROUTE: - * 1) rtm_family is ignored in IPv6, works for IPv4 - * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6) - * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard - */ - - if (krt_table_id(p) < 256) - r->r.rtm_table = krt_table_id(p); - else - nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p)); - - if (p->af == AF_MPLS) - priority = 0; - else if (a->source == RTS_DUMMY) - priority = e->u.krt.metric; - else if (KRT_CF->sys.metric) - priority = KRT_CF->sys.metric; - else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC))) - priority = ea->u.data; - - if (priority) - nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority); - - /* For route delete, we do not specify remaining route attributes */ - if (op == NL_OP_DELETE) - goto dest; - - /* Default scope is LINK for device routes, UNIVERSE otherwise */ - if (p->af == AF_MPLS) - r->r.rtm_scope = RT_SCOPE_UNIVERSE; - else if (ea = ea_find(eattrs, EA_KRT_SCOPE)) - r->r.rtm_scope = ea->u.data; - else - r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; - - if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) - nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); - - if (ea = ea_find(eattrs, EA_KRT_REALM)) - nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); - - - u32 metrics[KRT_METRICS_MAX]; - metrics[0] = 0; - - struct ea_walk_state ews = { .eattrs = eattrs }; - while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX)) - { - int id = ea->id - EA_KRT_METRICS; - metrics[0] |= 1 << id; - metrics[id] = ea->u.data; - } - - if (metrics[0]) - nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX); - - -dest: - switch (dest) - { - case RTD_UNICAST: - r->r.rtm_type = RTN_UNICAST; - if (nh->next && !krt_ecmp6(p)) - nl_add_multipath(&r->h, rsize, nh, p->af, eattrs); - else - { - nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); - nl_add_nexthop(&r->h, rsize, nh, p->af); - - if (nh->flags & RNF_ONLINK) - r->r.rtm_flags |= RTNH_F_ONLINK; - } - break; - case RTD_BLACKHOLE: - r->r.rtm_type = RTN_BLACKHOLE; - break; - case RTD_UNREACHABLE: - r->r.rtm_type = RTN_UNREACHABLE; - break; - case RTD_PROHIBIT: - r->r.rtm_type = RTN_PROHIBIT; - break; - case RTD_NONE: - break; - default: - bug("krt_capable inconsistent with nl_send_route"); - } - - /* Ignore missing for DELETE */ - return nl_exchange(&r->h, (op == NL_OP_DELETE)); -} - -static inline int -nl_add_rte(struct krt_proto *p, rte *e) -{ - rta *a = e->attrs; - int err = 0; - - if (krt_ecmp6(p) && a->nh.next) - { - struct nexthop *nh = &(a->nh); - - err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh); - if (err < 0) - return err; - - for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh); - - return err; - } - - return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh)); -} - -static inline int -nl_delete_rte(struct krt_proto *p, rte *e) -{ - int err = 0; - - /* For IPv6, we just repeatedly request DELETE until we get error */ - do - err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL); - while (krt_ecmp6(p) && !err); - - return err; -} - -static inline int -nl_replace_rte(struct krt_proto *p, rte *e) -{ - rta *a = e->attrs; - return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh)); -} - - -void -krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) -{ - int err = 0; - - /* - * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for - * matching rtm_protocol, but that is OK when dedicated priority is used. - * - * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP - * and with some kernel versions ECMP replace crashes kernel. Would need more - * testing and checks for kernel versions. - * - * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the - * old route value, so we do not try to optimize IPv6 ECMP reconfigurations. - */ - - if (krt_ipv4(p) && old && new) - { - err = nl_replace_rte(p, new); - } - else - { - if (old) - nl_delete_rte(p, old); - - if (new) - err = nl_add_rte(p, new); - } - - if (new) - { - if (err < 0) - bmap_clear(&p->sync_map, new->id); - else - bmap_set(&p->sync_map, new->id); - } -} - -static int -nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) -{ - /* Route merging is used for IPv6 scans */ - if (!s->scan || (rtm_family != AF_INET6)) - return 0; - - /* Saved and new route must have same network, proto/table, and priority */ - if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority)) - return 0; - - /* Both must be regular unicast routes */ - if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST)) - return 0; - - return 1; -} - -static void -nl_announce_route(struct nl_parse_state *s) -{ - rte *e = rte_get_temp(s->attrs); - e->net = s->net; - e->u.krt.src = s->krt_src; - e->u.krt.proto = s->krt_proto; - e->u.krt.seen = 0; - e->u.krt.best = 0; - e->u.krt.metric = s->krt_metric; - - if (s->scan) - krt_got_route(s->proto, e); - else - krt_got_route_async(s->proto, e, s->new); - - s->net = NULL; - s->attrs = NULL; - s->proto = NULL; - lp_flush(s->pool); -} - -static inline void -nl_parse_begin(struct nl_parse_state *s, int scan) -{ - memset(s, 0, sizeof (struct nl_parse_state)); - s->pool = nl_linpool; - s->scan = scan; -} - -static inline void -nl_parse_end(struct nl_parse_state *s) -{ - if (s->net) - nl_announce_route(s); -} - - -#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0) -#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0) - -static void -nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) -{ - struct krt_proto *p; - struct rtmsg *i; - struct rtattr *a[BIRD_RTA_MAX]; - int new = h->nlmsg_type == RTM_NEWROUTE; - - net_addr dst, src = {}; - u32 oif = ~0; - u32 table_id; - u32 priority = 0; - u32 def_scope = RT_SCOPE_UNIVERSE; - int krt_src; - - if (!(i = nl_checkin(h, sizeof(*i)))) - return; - - switch (i->rtm_family) - { - case AF_INET: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a))) - return; - - if (a[RTA_DST]) - net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len); - else - net_fill_ip4(&dst, IP4_NONE, 0); - break; - - case AF_INET6: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) - return; - - if (a[RTA_DST]) - net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len); - else - net_fill_ip6(&dst, IP6_NONE, 0); - - if (a[RTA_SRC]) - net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len); - else - net_fill_ip6(&src, IP6_NONE, 0); - break; - -#ifdef HAVE_MPLS_KERNEL - case AF_MPLS: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a))) - return; - - if (!a[RTA_DST]) - SKIP0("MPLS route without RTA_DST\n"); - - if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1) - SKIP0("MPLS route with multi-label RTA_DST\n"); - - net_fill_mpls(&dst, rta_mpls_stack[0]); - break; -#endif - - default: - return; - } - - if (a[RTA_OIF]) - oif = rta_get_u32(a[RTA_OIF]); - - if (a[RTA_TABLE]) - table_id = rta_get_u32(a[RTA_TABLE]); - else - table_id = i->rtm_table; - - if (i->rtm_flags & RTM_F_CLONED) - SKIP("cloned\n"); - - /* Do we know this table? */ - p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id); - if (!p) - SKIP("unknown table %u\n", table_id); - - if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR)) - SKIP("src prefix for non-SADR channel\n"); - - if (a[RTA_IIF]) - SKIP("IIF set\n"); - - if (i->rtm_tos != 0) /* We don't support TOS */ - SKIP("TOS %02x\n", i->rtm_tos); - - if (s->scan && !new) - SKIP("RTM_DELROUTE in scan\n"); - - if (a[RTA_PRIORITY]) - priority = rta_get_u32(a[RTA_PRIORITY]); - - int c = net_classify(&dst); - if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) - SKIP("strange class/scope\n"); - - switch (i->rtm_protocol) - { - case RTPROT_UNSPEC: - SKIP("proto unspec\n"); - - case RTPROT_REDIRECT: - krt_src = KRT_SRC_REDIRECT; - break; - - case RTPROT_KERNEL: - krt_src = KRT_SRC_KERNEL; - return; - - case RTPROT_BIRD: - if (!s->scan) - SKIP("echo\n"); - krt_src = KRT_SRC_BIRD; - break; - - case RTPROT_BOOT: - default: - krt_src = KRT_SRC_ALIEN; - } - - net_addr *n = &dst; - if (p->p.net_type == NET_IP6_SADR) - { - n = alloca(sizeof(net_addr_ip6_sadr)); - net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst), - net6_prefix(&src), net6_pxlen(&src)); - } - - net *net = net_get(p->p.main_channel->table, n); - - if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family)) - nl_announce_route(s); - - rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); - ra->src = p->p.main_source; - ra->source = RTS_INHERIT; - ra->scope = SCOPE_UNIVERSE; - - if (a[RTA_FLOW]) - s->rta_flow = rta_get_u32(a[RTA_FLOW]); - else - s->rta_flow = 0; - - switch (i->rtm_type) - { - case RTN_UNICAST: - ra->dest = RTD_UNICAST; - - if (a[RTA_MULTIPATH]) - { - struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src); - if (!nh) - SKIP("strange RTA_MULTIPATH\n"); - - nexthop_link(ra, nh); - break; - } - - if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) - SKIP("ignore RTNH_F_DEAD\n"); - - ra->nh.iface = if_find_by_index(oif); - if (!ra->nh.iface) - { - log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); - return; - } - - if (a[RTA_GATEWAY]) - ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); - -#ifdef HAVE_MPLS_KERNEL - if (a[RTA_VIA]) - ra->nh.gw = rta_get_via(a[RTA_VIA]); -#endif - - if (ipa_nonzero(ra->nh.gw)) - { - /* Silently skip strange 6to4 routes */ - const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) - return; - - if (i->rtm_flags & RTNH_F_ONLINK) - ra->nh.flags |= RNF_ONLINK; - - neighbor *nbr; - nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface, - (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); - if (!nbr || (nbr->scope == SCOPE_HOST)) - { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, - ra->nh.gw); - return; - } - } - - break; - case RTN_BLACKHOLE: - ra->dest = RTD_BLACKHOLE; - break; - case RTN_UNREACHABLE: - ra->dest = RTD_UNREACHABLE; - break; - case RTN_PROHIBIT: - ra->dest = RTD_PROHIBIT; - break; - /* FIXME: What about RTN_THROW? */ - default: - SKIP("type %d\n", i->rtm_type); - return; - } - -#ifdef HAVE_MPLS_KERNEL - if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) - ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); - - if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) - { - switch (rta_get_u16(a[RTA_ENCAP_TYPE])) - { - case LWTUNNEL_ENCAP_MPLS: - { - struct rtattr *enca[BIRD_RTA_MAX]; - nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); - nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); - ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); - break; - } - default: - SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); - break; - } - } -#endif - - if (i->rtm_scope != def_scope) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_SCOPE; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = i->rtm_scope; - } - - if (a[RTA_PREFSRC]) - { - ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_PREFSRC; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_IP_ADDRESS; - - struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps)); - ad->length = sizeof(ps); - memcpy(ad->data, &ps, sizeof(ps)); - - ea->attrs[0].u.ptr = ad; - } - - /* Can be set per-route or per-nexthop */ - if (s->rta_flow) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_REALM; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = s->rta_flow; - } - - if (a[RTA_METRICS]) - { - u32 metrics[KRT_METRICS_MAX]; - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); - int t, n = 0; - - if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) - { - log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr); - return; - } - - for (t = 1; t < KRT_METRICS_MAX; t++) - if (metrics[0] & (1 << t)) - { - ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t); - ea->attrs[n].flags = 0; - ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */ - ea->attrs[n].u.data = metrics[t]; - n++; - } - - if (n > 0) - { - ea->next = ra->eattrs; - ea->flags = EALF_SORTED; - ea->count = n; - ra->eattrs = ea; - } - } - - /* - * Ideally, now we would send the received route to the rest of kernel code. - * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we - * postpone it and merge next hops until the end of the sequence. Note that - * when doing merging of next hops, we expect the new route to be unipath. - * Otherwise, we ignore additional next hops in nexthop_insert(). - */ - - if (!s->net) - { - /* Store the new route */ - s->net = net; - s->attrs = ra; - s->proto = p; - s->new = new; - s->krt_src = krt_src; - s->krt_type = i->rtm_type; - s->krt_proto = i->rtm_protocol; - s->krt_metric = priority; - } - else - { - /* Merge next hops with the stored route */ - rta *oa = s->attrs; - - struct nexthop *nhs = &oa->nh; - nexthop_insert(&nhs, &ra->nh); - - /* Perhaps new nexthop is inserted at the first position */ - if (nhs == &ra->nh) - { - /* Swap rtas */ - s->attrs = ra; - - /* Keep old eattrs */ - ra->eattrs = oa->eattrs; - } - } -} - -void -krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */ -{ - struct nlmsghdr *h; - struct nl_parse_state s; - - nl_parse_begin(&s, 1); - nl_request_dump(AF_UNSPEC, RTM_GETROUTE); - while (h = nl_get_scan()) - if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) - nl_parse_route(&s, h); - else - log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); - nl_parse_end(&s); -} - -/* - * Asynchronous Netlink interface - */ - -static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */ -static byte *nl_async_rx_buffer; /* Receive buffer */ - -static void -nl_async_msg(struct nlmsghdr *h) -{ - struct nl_parse_state s; - - switch (h->nlmsg_type) - { - case RTM_NEWROUTE: - case RTM_DELROUTE: - DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type); - nl_parse_begin(&s, 0); - nl_parse_route(&s, h); - nl_parse_end(&s); - break; - case RTM_NEWLINK: - case RTM_DELLINK: - DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type); - if (kif_proto) - nl_parse_link(h, 0); - break; - case RTM_NEWADDR: - case RTM_DELADDR: - DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type); - if (kif_proto) - nl_parse_addr(h, 0); - break; - default: - DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type); - } -} - -static int -nl_async_hook(sock *sk, uint size UNUSED) -{ - struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE }; - struct sockaddr_nl sa; - struct msghdr m = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1, - }; - struct nlmsghdr *h; - int x; - uint len; - - x = recvmsg(sk->fd, &m, 0); - if (x < 0) - { - if (errno == ENOBUFS) - { - /* - * Netlink reports some packets have been thrown away. - * One day we might react to it by asking for route table - * scan in near future. - */ - log(L_WARN "Kernel dropped some netlink messages, will resync on next scan."); - return 1; /* More data are likely to be ready */ - } - else if (errno != EWOULDBLOCK) - log(L_ERR "Netlink recvmsg: %m"); - return 0; - } - if (sa.nl_pid) /* It isn't from the kernel */ - { - DBG("Non-kernel packet\n"); - return 1; - } - h = (void *) nl_async_rx_buffer; - len = x; - if (m.msg_flags & MSG_TRUNC) - { - log(L_WARN "Netlink got truncated asynchronous message"); - return 1; - } - while (NLMSG_OK(h, len)) - { - nl_async_msg(h); - h = NLMSG_NEXT(h, len); - } - if (len) - log(L_WARN "nl_async_hook: Found packet remnant of size %d", len); - return 1; -} - -static void -nl_async_err_hook(sock *sk, int e UNUSED) -{ - nl_async_hook(sk, 0); -} - -static void -nl_open_async(void) -{ - sock *sk; - struct sockaddr_nl sa; - int fd; - - if (nl_async_sk) - return; - - DBG("KRT: Opening async netlink socket\n"); - - fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (fd < 0) - { - log(L_ERR "Unable to open asynchronous rtnetlink socket: %m"); - return; - } - - bzero(&sa, sizeof(sa)); - sa.nl_family = AF_NETLINK; - sa.nl_groups = RTMGRP_LINK | - RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE | - RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE; - - if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) - { - log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m"); - close(fd); - return; - } - - nl_async_rx_buffer = xmalloc(NL_RX_SIZE); - - sk = nl_async_sk = sk_new(krt_pool); - sk->type = SK_MAGIC; - sk->rx_hook = nl_async_hook; - sk->err_hook = nl_async_err_hook; - sk->fd = fd; - if (sk_open(sk) < 0) - bug("Netlink: sk_open failed"); -} - - -/* - * Interface to the UNIX krt module - */ - -void -krt_sys_io_init(void) -{ - nl_linpool = lp_new_default(krt_pool); - HASH_INIT(nl_table_map, krt_pool, 6); -} - -int -krt_sys_start(struct krt_proto *p) -{ - struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p)); - - if (old) - { - log(L_ERR "%s: Kernel table %u already registered by %s", - p->p.name, krt_table_id(p), old->p.name); - return 0; - } - - HASH_INSERT2(nl_table_map, RTH, krt_pool, p); - - nl_open(); - nl_open_async(); - - return 1; -} - -void -krt_sys_shutdown(struct krt_proto *p) -{ - HASH_REMOVE2(nl_table_map, RTH, krt_pool, p); -} - -int -krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) -{ - return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric); -} - -void -krt_sys_init_config(struct krt_config *cf) -{ - cf->sys.table_id = RT_TABLE_MAIN; - cf->sys.metric = 32; -} - -void -krt_sys_copy_config(struct krt_config *d, struct krt_config *s) -{ - d->sys.table_id = s->sys.table_id; - d->sys.metric = s->sys.metric; -} - -static const char *krt_metrics_names[KRT_METRICS_MAX] = { - NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", - "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" -}; - -static const char *krt_features_names[KRT_FEATURES_MAX] = { - "ecn", NULL, NULL, "allfrag" -}; - -int -krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED) -{ - switch (a->id) - { - case EA_KRT_PREFSRC: - bsprintf(buf, "prefsrc"); - return GA_NAME; - - case EA_KRT_REALM: - bsprintf(buf, "realm"); - return GA_NAME; - - case EA_KRT_SCOPE: - bsprintf(buf, "scope"); - return GA_NAME; - - case EA_KRT_LOCK: - buf += bsprintf(buf, "lock:"); - ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); - return GA_FULL; - - case EA_KRT_FEATURES: - buf += bsprintf(buf, "features:"); - ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); - return GA_FULL; - - default:; - int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET; - if (id > 0 && id < KRT_METRICS_MAX) - { - bsprintf(buf, "%s", krt_metrics_names[id]); - return GA_NAME; - } - - return GA_UNKNOWN; - } -} - - - -void -kif_sys_start(struct kif_proto *p UNUSED) -{ - nl_open(); - nl_open_async(); -} - -void -kif_sys_shutdown(struct kif_proto *p UNUSED) -{ -} - -int -kif_update_sysdep_addr(struct iface *i UNUSED) -{ - return 0; -} |