summaryrefslogtreecommitdiff
path: root/sysdep/linux/netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdep/linux/netlink.c')
-rw-r--r--sysdep/linux/netlink.c699
1 files changed, 433 insertions, 266 deletions
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index bff2d579..656202ac 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -17,7 +17,7 @@
#undef LOCAL_DEBUG
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "lib/alloca.h"
@@ -26,6 +26,7 @@
#include "lib/socket.h"
#include "lib/string.h"
#include "lib/hash.h"
+#include "lib/macro.h"
#include "conf/conf.h"
#include <asm/types.h>
@@ -69,6 +70,10 @@
#define RTA_ENCAP 22
#endif
+#ifndef NETLINK_GET_STRICT_CHK
+#define NETLINK_GET_STRICT_CHK 12
+#endif
+
#define krt_ipv4(p) ((p)->af == AF_INET)
#define krt_ecmp6(p) ((p)->af == AF_INET6)
@@ -106,7 +111,7 @@ struct nl_parse_state
int merge;
net_addr *net;
- rta *attrs;
+ ea_list *attrs;
struct krt_proto *proto;
s8 new;
s8 krt_src;
@@ -118,6 +123,101 @@ struct nl_parse_state
};
/*
+ * Netlink eattr definitions
+ */
+
+#define KRT_METRICS_MAX ARRAY_SIZE(ea_krt_metrics)
+#define KRT_FEATURES_MAX 4
+
+static void krt_bitfield_format(const eattr *e, byte *buf, uint buflen);
+
+static struct ea_class
+ ea_krt_prefsrc = {
+ .name = "krt_prefsrc",
+ .type = T_IP,
+ },
+ ea_krt_realm = {
+ .name = "krt_realm",
+ .type = T_INT,
+ },
+ ea_krt_scope = {
+ .name = "krt_scope",
+ .type = T_INT,
+ };
+
+static struct ea_class ea_krt_metrics[] = {
+ [RTAX_LOCK] = {
+ .name = "krt_lock",
+ .type = T_INT,
+ .format = krt_bitfield_format,
+ },
+ [RTAX_FEATURES] = {
+ .name = "krt_features",
+ .type = T_INT,
+ .format = krt_bitfield_format,
+ },
+#define KRT_METRIC_INT(_rtax, _name) [_rtax] = { .name = _name, .type = T_INT }
+ KRT_METRIC_INT(RTAX_MTU, "krt_mtu"),
+ KRT_METRIC_INT(RTAX_WINDOW, "krt_window"),
+ KRT_METRIC_INT(RTAX_RTT, "krt_rtt"),
+ KRT_METRIC_INT(RTAX_RTTVAR, "krt_rttvar"),
+ KRT_METRIC_INT(RTAX_SSTHRESH, "krt_sstresh"),
+ KRT_METRIC_INT(RTAX_CWND, "krt_cwnd"),
+ KRT_METRIC_INT(RTAX_ADVMSS, "krt_advmss"),
+ KRT_METRIC_INT(RTAX_REORDERING, "krt_reordering"),
+ KRT_METRIC_INT(RTAX_HOPLIMIT, "krt_hoplimit"),
+ KRT_METRIC_INT(RTAX_INITCWND, "krt_initcwnd"),
+ KRT_METRIC_INT(RTAX_RTO_MIN, "krt_rto_min"),
+ KRT_METRIC_INT(RTAX_INITRWND, "krt_initrwnd"),
+ KRT_METRIC_INT(RTAX_QUICKACK, "krt_quickack"),
+#undef KRT_METRIC_INT
+};
+
+static const char *krt_metrics_names[KRT_METRICS_MAX] = {
+ NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
+ "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
+};
+
+static const char *krt_features_names[KRT_FEATURES_MAX] = {
+ "ecn", NULL, NULL, "allfrag"
+};
+
+static void
+krt_bitfield_format(const eattr *a, byte *buf, uint buflen)
+{
+ if (a->id == ea_krt_metrics[RTAX_LOCK].id)
+ ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
+ else if (a->id == ea_krt_metrics[RTAX_FEATURES].id)
+ ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
+}
+
+static void
+nl_ea_register(void)
+{
+ EA_REGISTER_ALL(
+ &ea_krt_prefsrc,
+ &ea_krt_realm,
+ &ea_krt_scope
+ );
+
+ for (uint i = 0; i < KRT_METRICS_MAX; i++)
+ {
+ if (!ea_krt_metrics[i].name)
+ ea_krt_metrics[i] = (struct ea_class) {
+ .name = mb_sprintf(&root_pool, "krt_metric_%d", i),
+ .type = T_INT,
+ };
+
+ ea_register_init(&ea_krt_metrics[i]);
+ }
+
+ for (uint i = 1; i < KRT_METRICS_MAX; i++)
+ ASSERT_DIE(ea_krt_metrics[i].id == ea_krt_metrics[0].id + i);
+}
+
+
+
+/*
* Synchronous Netlink interface
*/
@@ -130,7 +230,7 @@ struct nl_sock
uint last_size;
};
-#define NL_RX_SIZE 8192
+#define NL_RX_SIZE 32768
#define NL_OP_DELETE 0
#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
@@ -158,10 +258,46 @@ nl_open_sock(struct nl_sock *nl)
}
static void
+nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
+{
+ /*
+ * Strict checking is not necessary, it improves behavior on newer kernels.
+ * If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT
+ * run-time), we can just ignore it.
+ */
+#ifdef SOL_NETLINK
+ setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
+#endif
+}
+
+static void
+nl_set_rcvbuf(int fd, uint val)
+{
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
+ log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
+}
+
+static uint
+nl_cfg_rx_buffer_size(struct config *cfg)
+{
+ uint bufsize = 0;
+
+ struct proto_config *pc;
+ WALK_LIST(pc, cfg->protos)
+ if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
+ bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
+
+ return bufsize;
+}
+
+
+static void
nl_open(void)
{
nl_open_sock(&nl_scan);
nl_open_sock(&nl_req);
+
+ nl_set_strict_dump(&nl_scan, 1);
}
static void
@@ -180,20 +316,60 @@ nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
}
static void
-nl_request_dump(int af, int cmd)
+nl_request_dump_link(void)
{
struct {
struct nlmsghdr nh;
- struct rtgenmsg g;
+ struct ifinfomsg ifi;
} req = {
- .nh.nlmsg_type = cmd,
- .nh.nlmsg_len = sizeof(req),
+ .nh.nlmsg_type = RTM_GETLINK,
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
- .g.rtgen_family = af
+ .nh.nlmsg_seq = ++(nl_scan.seq),
+ .ifi.ifi_family = AF_UNSPEC,
};
- nl_send(&nl_scan, &req.nh);
+
+ send(nl_scan.fd, &req, sizeof(req), 0);
+ nl_scan.last_hdr = NULL;
}
+static void
+nl_request_dump_addr(int af)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .nh.nlmsg_type = RTM_GETADDR,
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+ .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ .nh.nlmsg_seq = ++(nl_scan.seq),
+ .ifa.ifa_family = af,
+ };
+
+ send(nl_scan.fd, &req, sizeof(req), 0);
+ nl_scan.last_hdr = NULL;
+}
+
+static void
+nl_request_dump_route(int af)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rtm;
+ } req = {
+ .nh.nlmsg_type = RTM_GETROUTE,
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
+ .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ .nh.nlmsg_seq = ++(nl_scan.seq),
+ .rtm.rtm_family = af,
+ };
+
+ send(nl_scan.fd, &req, sizeof(req), 0);
+ nl_scan.last_hdr = NULL;
+}
+
+
static struct nlmsghdr *
nl_get_reply(struct nl_sock *nl)
{
@@ -651,12 +827,12 @@ nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUS
}
static void
-nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs)
+nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop_adata *nhad, int af, ea_list *eattrs)
{
struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
- eattr *flow = ea_find(eattrs, EA_KRT_REALM);
+ eattr *flow = ea_find(eattrs, &ea_krt_realm);
- for (; nh; nh = nh->next)
+ NEXTHOP_WALK(nh, nhad)
{
struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
@@ -680,33 +856,49 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e
nl_close_attr(h, a);
}
-static struct nexthop *
-nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
+static struct nexthop_adata *
+nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src)
{
struct rtattr *a[BIRD_RTA_MAX];
- struct rtnexthop *nh = RTA_DATA(ra);
- struct nexthop *rv, *first, **last;
- unsigned len = RTA_PAYLOAD(ra);
+ struct rtnexthop *nh, *orig_nh = RTA_DATA(ra);
+ unsigned len, orig_len = RTA_PAYLOAD(ra);
+ uint cnt = 0;
+
+ /* First count the nexthops */
+ for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh))
+ {
+ /* Use RTNH_OK(nh,len) ?? */
+ if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
+ goto err;
- first = NULL;
- last = &first;
+ if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
+ ;
+ else
+ cnt++;
+ }
+
+ struct nexthop_adata *nhad = lp_allocz(s->pool, cnt * NEXTHOP_MAX_SIZE + sizeof *nhad);
+ struct nexthop *rv = &nhad->nh;
- while (len)
+ for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh))
{
/* Use RTNH_OK(nh,len) ?? */
if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
- return NULL;
+ goto err;
- if (nh->rtnh_flags & RTNH_F_DEAD)
- goto next;
+ if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
+ continue;
- *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
- last = &(rv->next);
+ *rv = (struct nexthop) {
+ .weight = nh->rtnh_hops,
+ .iface = if_find_by_index(nh->rtnh_ifindex),
+ };
- rv->weight = nh->rtnh_hops;
- rv->iface = if_find_by_index(nh->rtnh_ifindex);
if (!rv->iface)
- return NULL;
+ {
+ log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex);
+ return NULL;
+ }
/* Nonexistent RTNH_PAYLOAD ?? */
nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
@@ -714,18 +906,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
{
case AF_INET:
if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
- return NULL;
+ goto err;
break;
case AF_INET6:
if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
- return NULL;
+ goto err;
break;
#ifdef HAVE_MPLS_KERNEL
case AF_MPLS:
if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a)))
- return NULL;
+ goto err;
if (a[RTA_NEWDST])
rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label);
@@ -734,7 +926,7 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
#endif
default:
- return NULL;
+ goto err;
}
if (a[RTA_GATEWAY])
@@ -757,14 +949,19 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
nbr = neigh_find(&p->p, rv->gw, rv->iface,
(rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
if (!nbr || (nbr->scope == SCOPE_HOST))
- return NULL;
+ {
+ log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw);
+ return NULL;
+ }
}
#ifdef HAVE_MPLS_KERNEL
if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
{
- if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
- log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
+ if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS)
+ {
+ log(L_WARN "KRT: Received route %N with unknown encapsulation method %d",
+ n, rta_get_u16(a[RTA_ENCAP_TYPE]));
return NULL;
}
@@ -775,16 +972,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
}
#endif
- next:
- len -= NLMSG_ALIGN(nh->rtnh_len);
- nh = RTNH_NEXT(nh);
+ rv = NEXTHOP_NEXT(rv);
}
+ /* Store final length */
+ nhad->ad.length = (void *) rv - (void *) nhad->ad.data;
+
/* Ensure nexthops are sorted to satisfy nest invariant */
- if (!nexthop_is_sorted(first))
- first = nexthop_sort(first);
+ return nexthop_is_sorted(nhad) ? nhad : nexthop_sort(nhad, s->pool);
- return first;
+err:
+ log(L_ERR "KRT: Received strange multipath route %N", n);
+ return NULL;
}
static void
@@ -1139,7 +1338,7 @@ kif_do_scan(struct kif_proto *p UNUSED)
if_start_update();
- nl_request_dump(AF_UNSPEC, RTM_GETLINK);
+ nl_request_dump_link();
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
nl_parse_link(h, 1);
@@ -1166,14 +1365,14 @@ kif_do_scan(struct kif_proto *p UNUSED)
}
}
- nl_request_dump(AF_INET, RTM_GETADDR);
+ nl_request_dump_addr(AF_INET);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
nl_parse_addr(h, 1);
else
log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
- nl_request_dump(AF_INET6, RTM_GETADDR);
+ nl_request_dump_addr(AF_INET6);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
nl_parse_addr(h, 1);
@@ -1208,11 +1407,16 @@ HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
int
krt_capable(rte *e)
{
- rta *a = e->attrs;
+ eattr *ea = ea_find(e->attrs, &ea_gen_nexthop);
+ if (!ea)
+ return 0;
+
+ struct nexthop_adata *nhad = (void *) ea->u.ptr;
+ if (NEXTHOP_IS_REACHABLE(nhad))
+ return 1;
- switch (a->dest)
+ switch (nhad->dest)
{
- case RTD_UNICAST:
case RTD_BLACKHOLE:
case RTD_UNREACHABLE:
case RTD_PROHIBIT:
@@ -1224,21 +1428,21 @@ krt_capable(rte *e)
}
static inline int
-nh_bufsize(struct nexthop *nh)
+nh_bufsize(struct nexthop_adata *nhad)
{
int rv = 0;
- for (; nh != NULL; nh = nh->next)
+ NEXTHOP_WALK(nh, nhad)
rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
return rv;
}
static int
-nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop *nh)
+nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop_adata *nh)
{
eattr *ea;
- rta *a = e->attrs;
- ea_list *eattrs = a->eattrs;
- int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
+ ea_list *eattrs = e->attrs;
+
+ int bufsize = 128 + KRT_METRICS_MAX*8 + (nh ? nh_bufsize(nh) : 0);
u32 priority = 0;
struct {
@@ -1306,7 +1510,7 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho
priority = 0;
else if (KRT_CF->sys.metric)
priority = KRT_CF->sys.metric;
- else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
+ else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, &ea_krt_metric)))
priority = ea->u.data;
if (priority)
@@ -1319,15 +1523,15 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho
/* Default scope is LINK for device routes, UNIVERSE otherwise */
if (p->af == AF_MPLS)
r->r.rtm_scope = RT_SCOPE_UNIVERSE;
- else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
+ else if (ea = ea_find(eattrs, &ea_krt_scope))
r->r.rtm_scope = ea->u.data;
else
- r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+ r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->nh.gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
- if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
+ if (ea = ea_find(eattrs, &ea_krt_prefsrc))
nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
- if (ea = ea_find(eattrs, EA_KRT_REALM))
+ if (ea = ea_find(eattrs, &ea_krt_realm))
nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
@@ -1335,9 +1539,9 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho
metrics[0] = 0;
struct ea_walk_state ews = { .eattrs = eattrs };
- while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
+ while (ea = ea_walk(&ews, ea_krt_metrics[0].id, KRT_METRICS_MAX))
{
- int id = ea->id - EA_KRT_METRICS;
+ int id = ea->id - ea_krt_metrics[0].id;
metrics[0] |= 1 << id;
metrics[id] = ea->u.data;
}
@@ -1351,14 +1555,14 @@ dest:
{
case RTD_UNICAST:
r->r.rtm_type = RTN_UNICAST;
- if (nh->next && !krt_ecmp6(p))
+ if (!NEXTHOP_ONE(nh) && !krt_ecmp6(p))
nl_add_multipath(&r->h, rsize, nh, p->af, eattrs);
else
{
- nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
- nl_add_nexthop(&r->h, rsize, nh, p->af);
+ nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->nh.iface->index);
+ nl_add_nexthop(&r->h, rsize, &nh->nh, p->af);
- if (nh->flags & RNF_ONLINK)
+ if (nh->nh.flags & RNF_ONLINK)
r->r.rtm_flags |= RTNH_F_ONLINK;
}
break;
@@ -1384,24 +1588,39 @@ dest:
static inline int
nl_add_rte(struct krt_proto *p, rte *e)
{
- rta *a = e->attrs;
+ ea_list *ea = e->attrs;
int err = 0;
- if (krt_ecmp6(p) && a->nh.next)
- {
- struct nexthop *nh = &(a->nh);
+ eattr *nhea = ea_find(ea, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
- err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
- if (err < 0)
- return err;
-
- for (nh = nh->next; nh; nh = nh->next)
- err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
+ if (krt_ecmp6(p) && nhad && NEXTHOP_IS_REACHABLE(nhad) && !NEXTHOP_ONE(nhad))
+ {
+ uint cnt = 0;
+ NEXTHOP_WALK(nh, nhad)
+ {
+ struct {
+ struct nexthop_adata nhad;
+ u32 labels[MPLS_MAX_LABEL_STACK];
+ } nhx;
+ memcpy(&nhx.nhad.nh, nh, NEXTHOP_SIZE(nh));
+ nhx.nhad.ad.length = (void *) NEXTHOP_NEXT(&nhx.nhad.nh) - (void *) nhx.nhad.ad.data;
+
+ if (!cnt++)
+ {
+ err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, &nhx.nhad);
+ if (err < 0)
+ return err;
+ }
+ else
+ err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, &nhx.nhad);
+ }
return err;
}
- return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
+ return nl_send_route(p, e, NL_OP_ADD,
+ NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad);
}
static inline int
@@ -1420,8 +1639,10 @@ nl_delete_rte(struct krt_proto *p, const rte *e)
static inline int
nl_replace_rte(struct krt_proto *p, rte *e)
{
- rta *a = e->attrs;
- return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
+ eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
+ return nl_send_route(p, e, NL_OP_REPLACE,
+ NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad);
}
@@ -1490,21 +1711,16 @@ nl_announce_route(struct nl_parse_state *s)
.net = s->net,
};
- ea_list *ea = alloca(sizeof(ea_list) + 2 * sizeof(eattr));
- *ea = (ea_list) { .count = 2, .next = e0.attrs->eattrs };
- e0.attrs->eattrs = ea;
-
- ea->attrs[0] = (eattr) {
- .id = EA_KRT_SOURCE,
- .type = EAF_TYPE_INT,
- .u.data = s->krt_proto,
- };
- ea->attrs[1] = (eattr) {
- .id = EA_KRT_METRIC,
- .type = EAF_TYPE_INT,
- .u.data = s->krt_metric,
+ EA_LOCAL_LIST(2) ea = {
+ .l = { .count = 2, .next = e0.attrs },
+ .a = {
+ EA_LITERAL_EMBEDDED(&ea_krt_source, 0, s->krt_proto),
+ EA_LITERAL_EMBEDDED(&ea_krt_metric, 0, s->krt_metric),
+ },
};
+ e0.attrs = &ea.l;
+
if (s->scan)
krt_got_route(s->proto, &e0, s->krt_src);
else
@@ -1532,7 +1748,8 @@ nl_parse_end(struct nl_parse_state *s)
}
-#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
+#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0)
+#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0)
static void
nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
@@ -1585,10 +1802,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
return;
if (!a[RTA_DST])
- SKIP("MPLS route without RTA_DST");
+ SKIP0("MPLS route without RTA_DST\n");
if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
- SKIP("MPLS route with multi-label RTA_DST");
+ SKIP0("MPLS route with multi-label RTA_DST\n");
net_fill_mpls(&dst, rta_mpls_stack[0]);
break;
@@ -1606,6 +1823,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
else
table_id = i->rtm_table;
+ if (i->rtm_flags & RTM_F_CLONED)
+ SKIP("cloned\n");
+
/* Do we know this table? */
p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
if (!p)
@@ -1665,80 +1885,110 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
nl_announce_route(s);
- rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
- ra->source = RTS_INHERIT;
- ra->scope = SCOPE_UNIVERSE;
+ ea_list *ra = NULL;
+ ea_set_attr_u32(&ra, &ea_gen_source, 0, RTS_INHERIT);
if (a[RTA_FLOW])
s->rta_flow = rta_get_u32(a[RTA_FLOW]);
else
s->rta_flow = 0;
+ union {
+ struct {
+ struct adata ad;
+ struct nexthop nh;
+ u32 labels[MPLS_MAX_LABEL_STACK];
+ };
+ struct nexthop_adata nhad;
+ } nhad = {};
+
switch (i->rtm_type)
{
case RTN_UNICAST:
- ra->dest = RTD_UNICAST;
-
if (a[RTA_MULTIPATH])
{
- struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
+ struct nexthop_adata *nh = nl_parse_multipath(s, p, net, a[RTA_MULTIPATH], i->rtm_family, krt_src);
if (!nh)
- {
- log(L_ERR "KRT: Received strange multipath route %N", net);
- return;
- }
+ SKIP("strange RTA_MULTIPATH\n");
- nexthop_link(ra, nh);
+ ea_set_attr(&ra, EA_LITERAL_DIRECT_ADATA(
+ &ea_gen_nexthop, 0, &nh->ad));
break;
}
- if (i->rtm_flags & RTNH_F_DEAD)
- return;
+ if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
+ SKIP("ignore RTNH_F_DEAD\n");
- ra->nh.iface = if_find_by_index(oif);
- if (!ra->nh.iface)
+ nhad.nh.iface = if_find_by_index(oif);
+ if (!nhad.nh.iface)
{
log(L_ERR "KRT: Received route %N with unknown ifindex %u", net, oif);
return;
}
if (a[RTA_GATEWAY])
- ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
+ nhad.nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
#ifdef HAVE_MPLS_KERNEL
if (a[RTA_VIA])
- ra->nh.gw = rta_get_via(a[RTA_VIA]);
+ nhad.nh.gw = rta_get_via(a[RTA_VIA]);
#endif
- if (ipa_nonzero(ra->nh.gw))
+ if (ipa_nonzero(nhad.nh.gw))
{
/* Silently skip strange 6to4 routes */
const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
- if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
+ if ((i->rtm_family == AF_INET6) && ipa_in_netX(nhad.nh.gw, (net_addr *) &sit))
return;
if (i->rtm_flags & RTNH_F_ONLINK)
- ra->nh.flags |= RNF_ONLINK;
+ nhad.nh.flags |= RNF_ONLINK;
neighbor *nbr;
- nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
- (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
+ nbr = neigh_find(&p->p, nhad.nh.gw, nhad.nh.iface,
+ (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
if (!nbr || (nbr->scope == SCOPE_HOST))
{
- log(L_ERR "KRT: Received route %N with strange next-hop %I", net, ra->nh.gw);
+ log(L_ERR "KRT: Received route %N with strange next-hop %I", net,
+ nhad.nh.gw);
return;
}
}
+#ifdef HAVE_MPLS_KERNEL
+ if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !a[RTA_MULTIPATH])
+ nhad.nh.labels = rta_get_mpls(a[RTA_NEWDST], nhad.nh.label);
+
+ if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !a[RTA_MULTIPATH])
+ {
+ switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
+ {
+ case LWTUNNEL_ENCAP_MPLS:
+ {
+ struct rtattr *enca[BIRD_RTA_MAX];
+ nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
+ nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
+ nhad.nh.labels = rta_get_mpls(enca[RTA_DST], nhad.nh.label);
+ break;
+ }
+ default:
+ SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
+ break;
+ }
+ }
+#endif
+
+ /* Finalize the nexthop */
+ nhad.ad.length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data;
break;
case RTN_BLACKHOLE:
- ra->dest = RTD_BLACKHOLE;
+ nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_BLACKHOLE);
break;
case RTN_UNREACHABLE:
- ra->dest = RTD_UNREACHABLE;
+ nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_UNREACHABLE);
break;
case RTN_PROHIBIT:
- ra->dest = RTD_PROHIBIT;
+ nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_PROHIBIT);
break;
/* FIXME: What about RTN_THROW? */
default:
@@ -1746,105 +1996,36 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
return;
}
-#ifdef HAVE_MPLS_KERNEL
- if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
- ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
-
- if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
- {
- switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
- {
- case LWTUNNEL_ENCAP_MPLS:
- {
- struct rtattr *enca[BIRD_RTA_MAX];
- nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
- nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
- ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
- break;
- }
- default:
- SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
- break;
- }
- }
-#endif
-
if (i->rtm_scope != def_scope)
- {
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
- ea->next = ra->eattrs;
- ra->eattrs = ea;
- ea->flags = EALF_SORTED;
- ea->count = 1;
- ea->attrs[0].id = EA_KRT_SCOPE;
- ea->attrs[0].flags = 0;
- ea->attrs[0].type = EAF_TYPE_INT;
- ea->attrs[0].u.data = i->rtm_scope;
- }
+ ea_set_attr(&ra,
+ EA_LITERAL_EMBEDDED(&ea_krt_scope, 0, i->rtm_scope));
if (a[RTA_PREFSRC])
- {
- ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
-
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
- ea->next = ra->eattrs;
- ra->eattrs = ea;
- ea->flags = EALF_SORTED;
- ea->count = 1;
- ea->attrs[0].id = EA_KRT_PREFSRC;
- ea->attrs[0].flags = 0;
- ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
-
- struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
- ad->length = sizeof(ps);
- memcpy(ad->data, &ps, sizeof(ps));
-
- ea->attrs[0].u.ptr = ad;
- }
+ {
+ ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
+
+ ea_set_attr(&ra,
+ EA_LITERAL_STORE_ADATA(&ea_krt_prefsrc, 0, &ps, sizeof(ps)));
+ }
/* Can be set per-route or per-nexthop */
if (s->rta_flow)
- {
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
- ea->next = ra->eattrs;
- ra->eattrs = ea;
- ea->flags = EALF_SORTED;
- ea->count = 1;
- ea->attrs[0].id = EA_KRT_REALM;
- ea->attrs[0].flags = 0;
- ea->attrs[0].type = EAF_TYPE_INT;
- ea->attrs[0].u.data = s->rta_flow;
- }
+ ea_set_attr(&ra,
+ EA_LITERAL_EMBEDDED(&ea_krt_realm, 0, s->rta_flow));
if (a[RTA_METRICS])
{
u32 metrics[KRT_METRICS_MAX];
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
- int t, n = 0;
-
if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
{
log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net);
return;
}
- for (t = 1; t < KRT_METRICS_MAX; t++)
+ for (uint t = 1; t < KRT_METRICS_MAX; t++)
if (metrics[0] & (1 << t))
- {
- ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
- ea->attrs[n].flags = 0;
- ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
- ea->attrs[n].u.data = metrics[t];
- n++;
- }
-
- if (n > 0)
- {
- ea->next = ra->eattrs;
- ea->flags = EALF_SORTED;
- ea->count = n;
- ra->eattrs = ea;
- }
+ ea_set_attr(&ra,
+ EA_LITERAL_EMBEDDED(&ea_krt_metrics[t], 0, metrics[t]));
}
/*
@@ -1861,6 +2042,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
s->net = lp_alloc(s->pool, net->length);
net_copy(s->net, net);
+ ea_set_attr_data(&ra, &ea_gen_nexthop, 0,
+ nhad.ad.data, nhad.ad.length);
+
s->attrs = ra;
s->proto = p;
s->new = new;
@@ -1872,20 +2056,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
else
{
/* Merge next hops with the stored route */
- rta *oa = s->attrs;
-
- struct nexthop *nhs = &oa->nh;
- nexthop_insert(&nhs, &ra->nh);
-
- /* Perhaps new nexthop is inserted at the first position */
- if (nhs == &ra->nh)
- {
- /* Swap rtas */
- s->attrs = ra;
-
- /* Keep old eattrs */
- ra->eattrs = oa->eattrs;
- }
+ eattr *nhea = ea_find(s->attrs, &ea_gen_nexthop);
+ struct nexthop_adata *nhad_old = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
+
+ if (nhad_old)
+ ea_set_attr(&s->attrs,
+ EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0,
+ &(nexthop_merge(nhad_old, &nhad.nhad,
+ KRT_CF->merge_paths, s->pool)->ad)
+ ));
+ else
+ ea_set_attr_data(&s->attrs, &ea_gen_nexthop, 0,
+ nhad.ad.data, nhad.ad.length);
}
}
@@ -1896,7 +2078,7 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
struct nl_parse_state s;
nl_parse_begin(&s, 1);
- nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
+ nl_request_dump_route(AF_UNSPEC);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
nl_parse_route(&s, h);
@@ -1911,6 +2093,8 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
static byte *nl_async_rx_buffer; /* Receive buffer */
+static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
+static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
static void
nl_async_msg(struct nlmsghdr *h)
@@ -2046,6 +2230,32 @@ nl_open_async(void)
bug("Netlink: sk_open failed");
}
+static void
+nl_update_async_bufsize(void)
+{
+ /* No async socket */
+ if (!nl_async_sk)
+ return;
+
+ /* Already reconfigured */
+ if (nl_last_config == config)
+ return;
+
+ /* Update netlink buffer size */
+ uint bufsize = nl_cfg_rx_buffer_size(config);
+ if (bufsize && (bufsize != nl_async_bufsize))
+ {
+ /* Log message for reconfigurations only */
+ if (nl_last_config)
+ log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
+
+ nl_set_rcvbuf(nl_async_sk->fd, bufsize);
+ nl_async_bufsize = bufsize;
+ }
+
+ nl_last_config = config;
+}
+
/*
* Interface to the UNIX krt module
@@ -2056,6 +2266,8 @@ krt_sys_io_init(void)
{
nl_linpool = lp_new_default(krt_pool);
HASH_INIT(nl_table_map, krt_pool, 6);
+
+ nl_ea_register();
}
int
@@ -2074,6 +2286,7 @@ krt_sys_start(struct krt_proto *p)
nl_open();
nl_open_async();
+ nl_update_async_bufsize();
return 1;
}
@@ -2081,12 +2294,16 @@ krt_sys_start(struct krt_proto *p)
void
krt_sys_shutdown(struct krt_proto *p)
{
+ nl_update_async_bufsize();
+
HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
}
int
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
{
+ nl_update_async_bufsize();
+
return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
}
@@ -2104,56 +2321,6 @@ krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
d->sys.metric = s->sys.metric;
}
-static const char *krt_metrics_names[KRT_METRICS_MAX] = {
- NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
- "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
-};
-
-static const char *krt_features_names[KRT_FEATURES_MAX] = {
- "ecn", NULL, NULL, "allfrag"
-};
-
-int
-krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
-{
- switch (a->id)
- {
- case EA_KRT_PREFSRC:
- bsprintf(buf, "prefsrc");
- return GA_NAME;
-
- case EA_KRT_REALM:
- bsprintf(buf, "realm");
- return GA_NAME;
-
- case EA_KRT_SCOPE:
- bsprintf(buf, "scope");
- return GA_NAME;
-
- case EA_KRT_LOCK:
- buf += bsprintf(buf, "lock:");
- ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
- return GA_FULL;
-
- case EA_KRT_FEATURES:
- buf += bsprintf(buf, "features:");
- ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
- return GA_FULL;
-
- default:;
- int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
- if (id > 0 && id < KRT_METRICS_MAX)
- {
- bsprintf(buf, "%s", krt_metrics_names[id]);
- return GA_NAME;
- }
-
- return GA_UNKNOWN;
- }
-}
-
-
-
void
kif_sys_start(struct kif_proto *p UNUSED)
{