diff options
Diffstat (limited to 'sysdep/linux/netlink.c')
-rw-r--r-- | sysdep/linux/netlink.c | 699 |
1 files changed, 433 insertions, 266 deletions
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index bff2d579..656202ac 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -17,7 +17,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/alloca.h" @@ -26,6 +26,7 @@ #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" +#include "lib/macro.h" #include "conf/conf.h" #include <asm/types.h> @@ -69,6 +70,10 @@ #define RTA_ENCAP 22 #endif +#ifndef NETLINK_GET_STRICT_CHK +#define NETLINK_GET_STRICT_CHK 12 +#endif + #define krt_ipv4(p) ((p)->af == AF_INET) #define krt_ecmp6(p) ((p)->af == AF_INET6) @@ -106,7 +111,7 @@ struct nl_parse_state int merge; net_addr *net; - rta *attrs; + ea_list *attrs; struct krt_proto *proto; s8 new; s8 krt_src; @@ -118,6 +123,101 @@ struct nl_parse_state }; /* + * Netlink eattr definitions + */ + +#define KRT_METRICS_MAX ARRAY_SIZE(ea_krt_metrics) +#define KRT_FEATURES_MAX 4 + +static void krt_bitfield_format(const eattr *e, byte *buf, uint buflen); + +static struct ea_class + ea_krt_prefsrc = { + .name = "krt_prefsrc", + .type = T_IP, + }, + ea_krt_realm = { + .name = "krt_realm", + .type = T_INT, + }, + ea_krt_scope = { + .name = "krt_scope", + .type = T_INT, + }; + +static struct ea_class ea_krt_metrics[] = { + [RTAX_LOCK] = { + .name = "krt_lock", + .type = T_INT, + .format = krt_bitfield_format, + }, + [RTAX_FEATURES] = { + .name = "krt_features", + .type = T_INT, + .format = krt_bitfield_format, + }, +#define KRT_METRIC_INT(_rtax, _name) [_rtax] = { .name = _name, .type = T_INT } + KRT_METRIC_INT(RTAX_MTU, "krt_mtu"), + KRT_METRIC_INT(RTAX_WINDOW, "krt_window"), + KRT_METRIC_INT(RTAX_RTT, "krt_rtt"), + KRT_METRIC_INT(RTAX_RTTVAR, "krt_rttvar"), + KRT_METRIC_INT(RTAX_SSTHRESH, "krt_sstresh"), + KRT_METRIC_INT(RTAX_CWND, "krt_cwnd"), + KRT_METRIC_INT(RTAX_ADVMSS, "krt_advmss"), + KRT_METRIC_INT(RTAX_REORDERING, "krt_reordering"), + KRT_METRIC_INT(RTAX_HOPLIMIT, "krt_hoplimit"), + KRT_METRIC_INT(RTAX_INITCWND, "krt_initcwnd"), + KRT_METRIC_INT(RTAX_RTO_MIN, "krt_rto_min"), + KRT_METRIC_INT(RTAX_INITRWND, "krt_initrwnd"), + KRT_METRIC_INT(RTAX_QUICKACK, "krt_quickack"), +#undef KRT_METRIC_INT +}; + +static const char *krt_metrics_names[KRT_METRICS_MAX] = { + NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", + "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" +}; + +static const char *krt_features_names[KRT_FEATURES_MAX] = { + "ecn", NULL, NULL, "allfrag" +}; + +static void +krt_bitfield_format(const eattr *a, byte *buf, uint buflen) +{ + if (a->id == ea_krt_metrics[RTAX_LOCK].id) + ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); + else if (a->id == ea_krt_metrics[RTAX_FEATURES].id) + ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); +} + +static void +nl_ea_register(void) +{ + EA_REGISTER_ALL( + &ea_krt_prefsrc, + &ea_krt_realm, + &ea_krt_scope + ); + + for (uint i = 0; i < KRT_METRICS_MAX; i++) + { + if (!ea_krt_metrics[i].name) + ea_krt_metrics[i] = (struct ea_class) { + .name = mb_sprintf(&root_pool, "krt_metric_%d", i), + .type = T_INT, + }; + + ea_register_init(&ea_krt_metrics[i]); + } + + for (uint i = 1; i < KRT_METRICS_MAX; i++) + ASSERT_DIE(ea_krt_metrics[i].id == ea_krt_metrics[0].id + i); +} + + + +/* * Synchronous Netlink interface */ @@ -130,7 +230,7 @@ struct nl_sock uint last_size; }; -#define NL_RX_SIZE 8192 +#define NL_RX_SIZE 32768 #define NL_OP_DELETE 0 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL) @@ -158,10 +258,46 @@ nl_open_sock(struct nl_sock *nl) } static void +nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED) +{ + /* + * Strict checking is not necessary, it improves behavior on newer kernels. + * If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT + * run-time), we can just ignore it. + */ +#ifdef SOL_NETLINK + setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict)); +#endif +} + +static void +nl_set_rcvbuf(int fd, uint val) +{ + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0) + log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val); +} + +static uint +nl_cfg_rx_buffer_size(struct config *cfg) +{ + uint bufsize = 0; + + struct proto_config *pc; + WALK_LIST(pc, cfg->protos) + if ((pc->protocol == &proto_unix_kernel) && !pc->disabled) + bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer); + + return bufsize; +} + + +static void nl_open(void) { nl_open_sock(&nl_scan); nl_open_sock(&nl_req); + + nl_set_strict_dump(&nl_scan, 1); } static void @@ -180,20 +316,60 @@ nl_send(struct nl_sock *nl, struct nlmsghdr *nh) } static void -nl_request_dump(int af, int cmd) +nl_request_dump_link(void) { struct { struct nlmsghdr nh; - struct rtgenmsg g; + struct ifinfomsg ifi; } req = { - .nh.nlmsg_type = cmd, - .nh.nlmsg_len = sizeof(req), + .nh.nlmsg_type = RTM_GETLINK, + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, - .g.rtgen_family = af + .nh.nlmsg_seq = ++(nl_scan.seq), + .ifi.ifi_family = AF_UNSPEC, }; - nl_send(&nl_scan, &req.nh); + + send(nl_scan.fd, &req, sizeof(req), 0); + nl_scan.last_hdr = NULL; } +static void +nl_request_dump_addr(int af) +{ + struct { + struct nlmsghdr nh; + struct ifaddrmsg ifa; + } req = { + .nh.nlmsg_type = RTM_GETADDR, + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .nh.nlmsg_seq = ++(nl_scan.seq), + .ifa.ifa_family = af, + }; + + send(nl_scan.fd, &req, sizeof(req), 0); + nl_scan.last_hdr = NULL; +} + +static void +nl_request_dump_route(int af) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rtm; + } req = { + .nh.nlmsg_type = RTM_GETROUTE, + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .nh.nlmsg_seq = ++(nl_scan.seq), + .rtm.rtm_family = af, + }; + + send(nl_scan.fd, &req, sizeof(req), 0); + nl_scan.last_hdr = NULL; +} + + static struct nlmsghdr * nl_get_reply(struct nl_sock *nl) { @@ -651,12 +827,12 @@ nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUS } static void -nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop_adata *nhad, int af, ea_list *eattrs) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); - eattr *flow = ea_find(eattrs, EA_KRT_REALM); + eattr *flow = ea_find(eattrs, &ea_krt_realm); - for (; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize); @@ -680,33 +856,49 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e nl_close_attr(h, a); } -static struct nexthop * -nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af) +static struct nexthop_adata * +nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src) { struct rtattr *a[BIRD_RTA_MAX]; - struct rtnexthop *nh = RTA_DATA(ra); - struct nexthop *rv, *first, **last; - unsigned len = RTA_PAYLOAD(ra); + struct rtnexthop *nh, *orig_nh = RTA_DATA(ra); + unsigned len, orig_len = RTA_PAYLOAD(ra); + uint cnt = 0; + + /* First count the nexthops */ + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) + { + /* Use RTNH_OK(nh,len) ?? */ + if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) + goto err; - first = NULL; - last = &first; + if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + ; + else + cnt++; + } + + struct nexthop_adata *nhad = lp_allocz(s->pool, cnt * NEXTHOP_MAX_SIZE + sizeof *nhad); + struct nexthop *rv = &nhad->nh; - while (len) + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) { /* Use RTNH_OK(nh,len) ?? */ if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) - return NULL; + goto err; - if (nh->rtnh_flags & RTNH_F_DEAD) - goto next; + if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + continue; - *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); - last = &(rv->next); + *rv = (struct nexthop) { + .weight = nh->rtnh_hops, + .iface = if_find_by_index(nh->rtnh_ifindex), + }; - rv->weight = nh->rtnh_hops; - rv->iface = if_find_by_index(nh->rtnh_ifindex); if (!rv->iface) - return NULL; + { + log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex); + return NULL; + } /* Nonexistent RTNH_PAYLOAD ?? */ nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0); @@ -714,18 +906,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr { case AF_INET: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a))) - return NULL; + goto err; break; case AF_INET6: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a))) - return NULL; + goto err; break; #ifdef HAVE_MPLS_KERNEL case AF_MPLS: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a))) - return NULL; + goto err; if (a[RTA_NEWDST]) rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label); @@ -734,7 +926,7 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr #endif default: - return NULL; + goto err; } if (a[RTA_GATEWAY]) @@ -757,14 +949,19 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr nbr = neigh_find(&p->p, rv->gw, rv->iface, (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) - return NULL; + { + log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw); + return NULL; + } } #ifdef HAVE_MPLS_KERNEL if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE]) { - if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) { - log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE])); + if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) + { + log(L_WARN "KRT: Received route %N with unknown encapsulation method %d", + n, rta_get_u16(a[RTA_ENCAP_TYPE])); return NULL; } @@ -775,16 +972,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr } #endif - next: - len -= NLMSG_ALIGN(nh->rtnh_len); - nh = RTNH_NEXT(nh); + rv = NEXTHOP_NEXT(rv); } + /* Store final length */ + nhad->ad.length = (void *) rv - (void *) nhad->ad.data; + /* Ensure nexthops are sorted to satisfy nest invariant */ - if (!nexthop_is_sorted(first)) - first = nexthop_sort(first); + return nexthop_is_sorted(nhad) ? nhad : nexthop_sort(nhad, s->pool); - return first; +err: + log(L_ERR "KRT: Received strange multipath route %N", n); + return NULL; } static void @@ -1139,7 +1338,7 @@ kif_do_scan(struct kif_proto *p UNUSED) if_start_update(); - nl_request_dump(AF_UNSPEC, RTM_GETLINK); + nl_request_dump_link(); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK) nl_parse_link(h, 1); @@ -1166,14 +1365,14 @@ kif_do_scan(struct kif_proto *p UNUSED) } } - nl_request_dump(AF_INET, RTM_GETADDR); + nl_request_dump_addr(AF_INET); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) nl_parse_addr(h, 1); else log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); - nl_request_dump(AF_INET6, RTM_GETADDR); + nl_request_dump_addr(AF_INET6); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) nl_parse_addr(h, 1); @@ -1208,11 +1407,16 @@ HASH_DEFINE_REHASH_FN(RTH, struct krt_proto) int krt_capable(rte *e) { - rta *a = e->attrs; + eattr *ea = ea_find(e->attrs, &ea_gen_nexthop); + if (!ea) + return 0; + + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) + return 1; - switch (a->dest) + switch (nhad->dest) { - case RTD_UNICAST: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: @@ -1224,21 +1428,21 @@ krt_capable(rte *e) } static inline int -nh_bufsize(struct nexthop *nh) +nh_bufsize(struct nexthop_adata *nhad) { int rv = 0; - for (; nh != NULL; nh = nh->next) + NEXTHOP_WALK(nh, nhad) rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr))); return rv; } static int -nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop *nh) +nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop_adata *nh) { eattr *ea; - rta *a = e->attrs; - ea_list *eattrs = a->eattrs; - int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); + ea_list *eattrs = e->attrs; + + int bufsize = 128 + KRT_METRICS_MAX*8 + (nh ? nh_bufsize(nh) : 0); u32 priority = 0; struct { @@ -1306,7 +1510,7 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho priority = 0; else if (KRT_CF->sys.metric) priority = KRT_CF->sys.metric; - else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC))) + else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, &ea_krt_metric))) priority = ea->u.data; if (priority) @@ -1319,15 +1523,15 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho /* Default scope is LINK for device routes, UNIVERSE otherwise */ if (p->af == AF_MPLS) r->r.rtm_scope = RT_SCOPE_UNIVERSE; - else if (ea = ea_find(eattrs, EA_KRT_SCOPE)) + else if (ea = ea_find(eattrs, &ea_krt_scope)) r->r.rtm_scope = ea->u.data; else - r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->nh.gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; - if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) + if (ea = ea_find(eattrs, &ea_krt_prefsrc)) nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); - if (ea = ea_find(eattrs, EA_KRT_REALM)) + if (ea = ea_find(eattrs, &ea_krt_realm)) nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); @@ -1335,9 +1539,9 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho metrics[0] = 0; struct ea_walk_state ews = { .eattrs = eattrs }; - while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX)) + while (ea = ea_walk(&ews, ea_krt_metrics[0].id, KRT_METRICS_MAX)) { - int id = ea->id - EA_KRT_METRICS; + int id = ea->id - ea_krt_metrics[0].id; metrics[0] |= 1 << id; metrics[id] = ea->u.data; } @@ -1351,14 +1555,14 @@ dest: { case RTD_UNICAST: r->r.rtm_type = RTN_UNICAST; - if (nh->next && !krt_ecmp6(p)) + if (!NEXTHOP_ONE(nh) && !krt_ecmp6(p)) nl_add_multipath(&r->h, rsize, nh, p->af, eattrs); else { - nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); - nl_add_nexthop(&r->h, rsize, nh, p->af); + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->nh.iface->index); + nl_add_nexthop(&r->h, rsize, &nh->nh, p->af); - if (nh->flags & RNF_ONLINK) + if (nh->nh.flags & RNF_ONLINK) r->r.rtm_flags |= RTNH_F_ONLINK; } break; @@ -1384,24 +1588,39 @@ dest: static inline int nl_add_rte(struct krt_proto *p, rte *e) { - rta *a = e->attrs; + ea_list *ea = e->attrs; int err = 0; - if (krt_ecmp6(p) && a->nh.next) - { - struct nexthop *nh = &(a->nh); + eattr *nhea = ea_find(ea, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; - err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh); - if (err < 0) - return err; - - for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh); + if (krt_ecmp6(p) && nhad && NEXTHOP_IS_REACHABLE(nhad) && !NEXTHOP_ONE(nhad)) + { + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + { + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nhx; + memcpy(&nhx.nhad.nh, nh, NEXTHOP_SIZE(nh)); + nhx.nhad.ad.length = (void *) NEXTHOP_NEXT(&nhx.nhad.nh) - (void *) nhx.nhad.ad.data; + + if (!cnt++) + { + err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, &nhx.nhad); + if (err < 0) + return err; + } + else + err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, &nhx.nhad); + } return err; } - return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh)); + return nl_send_route(p, e, NL_OP_ADD, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } static inline int @@ -1420,8 +1639,10 @@ nl_delete_rte(struct krt_proto *p, const rte *e) static inline int nl_replace_rte(struct krt_proto *p, rte *e) { - rta *a = e->attrs; - return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh)); + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + return nl_send_route(p, e, NL_OP_REPLACE, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } @@ -1490,21 +1711,16 @@ nl_announce_route(struct nl_parse_state *s) .net = s->net, }; - ea_list *ea = alloca(sizeof(ea_list) + 2 * sizeof(eattr)); - *ea = (ea_list) { .count = 2, .next = e0.attrs->eattrs }; - e0.attrs->eattrs = ea; - - ea->attrs[0] = (eattr) { - .id = EA_KRT_SOURCE, - .type = EAF_TYPE_INT, - .u.data = s->krt_proto, - }; - ea->attrs[1] = (eattr) { - .id = EA_KRT_METRIC, - .type = EAF_TYPE_INT, - .u.data = s->krt_metric, + EA_LOCAL_LIST(2) ea = { + .l = { .count = 2, .next = e0.attrs }, + .a = { + EA_LITERAL_EMBEDDED(&ea_krt_source, 0, s->krt_proto), + EA_LITERAL_EMBEDDED(&ea_krt_metric, 0, s->krt_metric), + }, }; + e0.attrs = &ea.l; + if (s->scan) krt_got_route(s->proto, &e0, s->krt_src); else @@ -1532,7 +1748,8 @@ nl_parse_end(struct nl_parse_state *s) } -#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0) +#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0) +#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0) static void nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) @@ -1585,10 +1802,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; if (!a[RTA_DST]) - SKIP("MPLS route without RTA_DST"); + SKIP0("MPLS route without RTA_DST\n"); if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1) - SKIP("MPLS route with multi-label RTA_DST"); + SKIP0("MPLS route with multi-label RTA_DST\n"); net_fill_mpls(&dst, rta_mpls_stack[0]); break; @@ -1606,6 +1823,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else table_id = i->rtm_table; + if (i->rtm_flags & RTM_F_CLONED) + SKIP("cloned\n"); + /* Do we know this table? */ p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id); if (!p) @@ -1665,80 +1885,110 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family)) nl_announce_route(s); - rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); - ra->source = RTS_INHERIT; - ra->scope = SCOPE_UNIVERSE; + ea_list *ra = NULL; + ea_set_attr_u32(&ra, &ea_gen_source, 0, RTS_INHERIT); if (a[RTA_FLOW]) s->rta_flow = rta_get_u32(a[RTA_FLOW]); else s->rta_flow = 0; + union { + struct { + struct adata ad; + struct nexthop nh; + u32 labels[MPLS_MAX_LABEL_STACK]; + }; + struct nexthop_adata nhad; + } nhad = {}; + switch (i->rtm_type) { case RTN_UNICAST: - ra->dest = RTD_UNICAST; - if (a[RTA_MULTIPATH]) { - struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family); + struct nexthop_adata *nh = nl_parse_multipath(s, p, net, a[RTA_MULTIPATH], i->rtm_family, krt_src); if (!nh) - { - log(L_ERR "KRT: Received strange multipath route %N", net); - return; - } + SKIP("strange RTA_MULTIPATH\n"); - nexthop_link(ra, nh); + ea_set_attr(&ra, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &nh->ad)); break; } - if (i->rtm_flags & RTNH_F_DEAD) - return; + if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + SKIP("ignore RTNH_F_DEAD\n"); - ra->nh.iface = if_find_by_index(oif); - if (!ra->nh.iface) + nhad.nh.iface = if_find_by_index(oif); + if (!nhad.nh.iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", net, oif); return; } if (a[RTA_GATEWAY]) - ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); + nhad.nh.gw = rta_get_ipa(a[RTA_GATEWAY]); #ifdef HAVE_MPLS_KERNEL if (a[RTA_VIA]) - ra->nh.gw = rta_get_via(a[RTA_VIA]); + nhad.nh.gw = rta_get_via(a[RTA_VIA]); #endif - if (ipa_nonzero(ra->nh.gw)) + if (ipa_nonzero(nhad.nh.gw)) { /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) + if ((i->rtm_family == AF_INET6) && ipa_in_netX(nhad.nh.gw, (net_addr *) &sit)) return; if (i->rtm_flags & RTNH_F_ONLINK) - ra->nh.flags |= RNF_ONLINK; + nhad.nh.flags |= RNF_ONLINK; neighbor *nbr; - nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface, - (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + nbr = neigh_find(&p->p, nhad.nh.gw, nhad.nh.iface, + (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net, ra->nh.gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net, + nhad.nh.gw); return; } } +#ifdef HAVE_MPLS_KERNEL + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !a[RTA_MULTIPATH]) + nhad.nh.labels = rta_get_mpls(a[RTA_NEWDST], nhad.nh.label); + + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !a[RTA_MULTIPATH]) + { + switch (rta_get_u16(a[RTA_ENCAP_TYPE])) + { + case LWTUNNEL_ENCAP_MPLS: + { + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + nhad.nh.labels = rta_get_mpls(enca[RTA_DST], nhad.nh.label); + break; + } + default: + SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); + break; + } + } +#endif + + /* Finalize the nexthop */ + nhad.ad.length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data; break; case RTN_BLACKHOLE: - ra->dest = RTD_BLACKHOLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_BLACKHOLE); break; case RTN_UNREACHABLE: - ra->dest = RTD_UNREACHABLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_UNREACHABLE); break; case RTN_PROHIBIT: - ra->dest = RTD_PROHIBIT; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_PROHIBIT); break; /* FIXME: What about RTN_THROW? */ default: @@ -1746,105 +1996,36 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; } -#ifdef HAVE_MPLS_KERNEL - if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) - ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); - - if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) - { - switch (rta_get_u16(a[RTA_ENCAP_TYPE])) - { - case LWTUNNEL_ENCAP_MPLS: - { - struct rtattr *enca[BIRD_RTA_MAX]; - nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); - nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); - ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); - break; - } - default: - SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); - break; - } - } -#endif - if (i->rtm_scope != def_scope) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_SCOPE; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = i->rtm_scope; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_scope, 0, i->rtm_scope)); if (a[RTA_PREFSRC]) - { - ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_PREFSRC; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_IP_ADDRESS; - - struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps)); - ad->length = sizeof(ps); - memcpy(ad->data, &ps, sizeof(ps)); - - ea->attrs[0].u.ptr = ad; - } + { + ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); + + ea_set_attr(&ra, + EA_LITERAL_STORE_ADATA(&ea_krt_prefsrc, 0, &ps, sizeof(ps))); + } /* Can be set per-route or per-nexthop */ if (s->rta_flow) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_REALM; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = s->rta_flow; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_realm, 0, s->rta_flow)); if (a[RTA_METRICS]) { u32 metrics[KRT_METRICS_MAX]; - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); - int t, n = 0; - if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) { log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net); return; } - for (t = 1; t < KRT_METRICS_MAX; t++) + for (uint t = 1; t < KRT_METRICS_MAX; t++) if (metrics[0] & (1 << t)) - { - ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t); - ea->attrs[n].flags = 0; - ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */ - ea->attrs[n].u.data = metrics[t]; - n++; - } - - if (n > 0) - { - ea->next = ra->eattrs; - ea->flags = EALF_SORTED; - ea->count = n; - ra->eattrs = ea; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_metrics[t], 0, metrics[t])); } /* @@ -1861,6 +2042,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) s->net = lp_alloc(s->pool, net->length); net_copy(s->net, net); + ea_set_attr_data(&ra, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); + s->attrs = ra; s->proto = p; s->new = new; @@ -1872,20 +2056,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *oa = s->attrs; - - struct nexthop *nhs = &oa->nh; - nexthop_insert(&nhs, &ra->nh); - - /* Perhaps new nexthop is inserted at the first position */ - if (nhs == &ra->nh) - { - /* Swap rtas */ - s->attrs = ra; - - /* Keep old eattrs */ - ra->eattrs = oa->eattrs; - } + eattr *nhea = ea_find(s->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad_old = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + + if (nhad_old) + ea_set_attr(&s->attrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_merge(nhad_old, &nhad.nhad, + KRT_CF->merge_paths, s->pool)->ad) + )); + else + ea_set_attr_data(&s->attrs, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); } } @@ -1896,7 +2078,7 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL struct nl_parse_state s; nl_parse_begin(&s, 1); - nl_request_dump(AF_UNSPEC, RTM_GETROUTE); + nl_request_dump_route(AF_UNSPEC); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) nl_parse_route(&s, h); @@ -1911,6 +2093,8 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */ static byte *nl_async_rx_buffer; /* Receive buffer */ +static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */ +static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */ static void nl_async_msg(struct nlmsghdr *h) @@ -2046,6 +2230,32 @@ nl_open_async(void) bug("Netlink: sk_open failed"); } +static void +nl_update_async_bufsize(void) +{ + /* No async socket */ + if (!nl_async_sk) + return; + + /* Already reconfigured */ + if (nl_last_config == config) + return; + + /* Update netlink buffer size */ + uint bufsize = nl_cfg_rx_buffer_size(config); + if (bufsize && (bufsize != nl_async_bufsize)) + { + /* Log message for reconfigurations only */ + if (nl_last_config) + log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize); + + nl_set_rcvbuf(nl_async_sk->fd, bufsize); + nl_async_bufsize = bufsize; + } + + nl_last_config = config; +} + /* * Interface to the UNIX krt module @@ -2056,6 +2266,8 @@ krt_sys_io_init(void) { nl_linpool = lp_new_default(krt_pool); HASH_INIT(nl_table_map, krt_pool, 6); + + nl_ea_register(); } int @@ -2074,6 +2286,7 @@ krt_sys_start(struct krt_proto *p) nl_open(); nl_open_async(); + nl_update_async_bufsize(); return 1; } @@ -2081,12 +2294,16 @@ krt_sys_start(struct krt_proto *p) void krt_sys_shutdown(struct krt_proto *p) { + nl_update_async_bufsize(); + HASH_REMOVE2(nl_table_map, RTH, krt_pool, p); } int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) { + nl_update_async_bufsize(); + return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric); } @@ -2104,56 +2321,6 @@ krt_sys_copy_config(struct krt_config *d, struct krt_config *s) d->sys.metric = s->sys.metric; } -static const char *krt_metrics_names[KRT_METRICS_MAX] = { - NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", - "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" -}; - -static const char *krt_features_names[KRT_FEATURES_MAX] = { - "ecn", NULL, NULL, "allfrag" -}; - -int -krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED) -{ - switch (a->id) - { - case EA_KRT_PREFSRC: - bsprintf(buf, "prefsrc"); - return GA_NAME; - - case EA_KRT_REALM: - bsprintf(buf, "realm"); - return GA_NAME; - - case EA_KRT_SCOPE: - bsprintf(buf, "scope"); - return GA_NAME; - - case EA_KRT_LOCK: - buf += bsprintf(buf, "lock:"); - ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); - return GA_FULL; - - case EA_KRT_FEATURES: - buf += bsprintf(buf, "features:"); - ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); - return GA_FULL; - - default:; - int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET; - if (id > 0 && id < KRT_METRICS_MAX) - { - bsprintf(buf, "%s", krt_metrics_names[id]); - return GA_NAME; - } - - return GA_UNKNOWN; - } -} - - - void kif_sys_start(struct kif_proto *p UNUSED) { |