diff options
Diffstat (limited to 'sysdep/linux')
-rw-r--r-- | sysdep/linux/krt-sys.h | 32 | ||||
-rw-r--r-- | sysdep/linux/netlink.Y | 48 | ||||
-rw-r--r-- | sysdep/linux/netlink.c | 558 |
3 files changed, 317 insertions, 321 deletions
diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h index 8897f889..aa90f6e4 100644 --- a/sysdep/linux/krt-sys.h +++ b/sysdep/linux/krt-sys.h @@ -34,38 +34,6 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i UNUSED) { return N #define KRT_ALLOW_MERGE_PATHS 1 -#define EA_KRT_PREFSRC EA_CODE(PROTOCOL_KERNEL, 0x10) -#define EA_KRT_REALM EA_CODE(PROTOCOL_KERNEL, 0x11) -#define EA_KRT_SCOPE EA_CODE(PROTOCOL_KERNEL, 0x12) - - -#define KRT_METRICS_MAX 0x10 /* RTAX_QUICKACK+1 */ -#define KRT_METRICS_OFFSET 0x20 /* Offset of EA_KRT_* vs RTAX_* */ - -#define KRT_FEATURES_MAX 4 - -/* - * Following attributes are parts of RTA_METRICS kernel route attribute, their - * ids must be consistent with their RTAX_* constants (+ KRT_METRICS_OFFSET) - */ -#define EA_KRT_METRICS EA_CODE(PROTOCOL_KERNEL, 0x20) /* Dummy one */ -#define EA_KRT_LOCK EA_CODE(PROTOCOL_KERNEL, 0x21) -#define EA_KRT_MTU EA_CODE(PROTOCOL_KERNEL, 0x22) -#define EA_KRT_WINDOW EA_CODE(PROTOCOL_KERNEL, 0x23) -#define EA_KRT_RTT EA_CODE(PROTOCOL_KERNEL, 0x24) -#define EA_KRT_RTTVAR EA_CODE(PROTOCOL_KERNEL, 0x25) -#define EA_KRT_SSTRESH EA_CODE(PROTOCOL_KERNEL, 0x26) -#define EA_KRT_CWND EA_CODE(PROTOCOL_KERNEL, 0x27) -#define EA_KRT_ADVMSS EA_CODE(PROTOCOL_KERNEL, 0x28) -#define EA_KRT_REORDERING EA_CODE(PROTOCOL_KERNEL, 0x29) -#define EA_KRT_HOPLIMIT EA_CODE(PROTOCOL_KERNEL, 0x2a) -#define EA_KRT_INITCWND EA_CODE(PROTOCOL_KERNEL, 0x2b) -#define EA_KRT_FEATURES EA_CODE(PROTOCOL_KERNEL, 0x2c) -#define EA_KRT_RTO_MIN EA_CODE(PROTOCOL_KERNEL, 0x2d) -#define EA_KRT_INITRWND EA_CODE(PROTOCOL_KERNEL, 0x2e) -#define EA_KRT_QUICKACK EA_CODE(PROTOCOL_KERNEL, 0x2f) - - struct krt_params { u32 table_id; /* Kernel table ID we sync with */ u32 metric; /* Kernel metric used for all routes */ diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y index 487ad1d8..7ba8c7c9 100644 --- a/sysdep/linux/netlink.Y +++ b/sysdep/linux/netlink.Y @@ -11,9 +11,6 @@ CF_HDR CF_DECLS CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER, - KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW, - KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING, - KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK, KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR, KRT_LOCK_SSTRESH, KRT_LOCK_CWND, KRT_LOCK_ADVMSS, KRT_LOCK_REORDERING, KRT_LOCK_HOPLIMIT, KRT_LOCK_RTO_MIN, KRT_FEATURE_ECN, KRT_FEATURE_ALLFRAG) @@ -28,39 +25,22 @@ kern_sys_item: | NETLINK RX BUFFER expr { THIS_KRT->sys.netlink_rx_buffer = $4; } ; -dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ; -dynamic_attr: KRT_REALM { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REALM); } ; -dynamic_attr: KRT_SCOPE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SCOPE); } ; - -dynamic_attr: KRT_MTU { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_MTU); } ; -dynamic_attr: KRT_WINDOW { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_WINDOW); } ; -dynamic_attr: KRT_RTT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTT); } ; -dynamic_attr: KRT_RTTVAR { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTTVAR); } ; -dynamic_attr: KRT_SSTRESH { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SSTRESH); } ; -dynamic_attr: KRT_CWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_CWND); } ; -dynamic_attr: KRT_ADVMSS { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_ADVMSS); } ; -dynamic_attr: KRT_REORDERING { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REORDERING); } ; -dynamic_attr: KRT_HOPLIMIT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_HOPLIMIT); } ; -dynamic_attr: KRT_INITCWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITCWND); } ; -dynamic_attr: KRT_RTO_MIN { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTO_MIN); } ; -dynamic_attr: KRT_INITRWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITRWND); } ; -dynamic_attr: KRT_QUICKACK { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_QUICKACK); } ; - /* Bits of EA_KRT_LOCK, based on RTAX_* constants */ -dynamic_attr: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, T_BOOL, EA_KRT_LOCK); } ; - -dynamic_attr: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, T_BOOL, EA_KRT_FEATURES); } ; -dynamic_attr: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr(3, T_BOOL, EA_KRT_FEATURES); } ; +attr_bit: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, "krt_lock"); } ; +attr_bit: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, "krt_lock"); } ; +attr_bit: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, "krt_lock"); } ; +attr_bit: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, "krt_lock"); } ; +attr_bit: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, "krt_lock"); } ; +attr_bit: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, "krt_lock"); } ; +attr_bit: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, "krt_lock"); } ; + +/* Bits of EA_KRT_FEATURES */ +attr_bit: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, "krt_features"); } ; +attr_bit: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr_bit(3, "krt_features"); } ; CF_CODE diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index bc65e0d2..099ae6e9 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -17,7 +17,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/alloca.h" @@ -26,6 +26,7 @@ #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" +#include "lib/macro.h" #include "conf/conf.h" #include <asm/types.h> @@ -109,8 +110,8 @@ struct nl_parse_state int scan; int merge; - net *net; - rta *attrs; + net_addr *net; + ea_list *attrs; struct krt_proto *proto; s8 new; s8 krt_src; @@ -122,6 +123,101 @@ struct nl_parse_state }; /* + * Netlink eattr definitions + */ + +#define KRT_METRICS_MAX ARRAY_SIZE(ea_krt_metrics) +#define KRT_FEATURES_MAX 4 + +static void krt_bitfield_format(const eattr *e, byte *buf, uint buflen); + +static struct ea_class + ea_krt_prefsrc = { + .name = "krt_prefsrc", + .type = T_IP, + }, + ea_krt_realm = { + .name = "krt_realm", + .type = T_INT, + }, + ea_krt_scope = { + .name = "krt_scope", + .type = T_INT, + }; + +static struct ea_class ea_krt_metrics[] = { + [RTAX_LOCK] = { + .name = "krt_lock", + .type = T_INT, + .format = krt_bitfield_format, + }, + [RTAX_FEATURES] = { + .name = "krt_features", + .type = T_INT, + .format = krt_bitfield_format, + }, +#define KRT_METRIC_INT(_rtax, _name) [_rtax] = { .name = _name, .type = T_INT } + KRT_METRIC_INT(RTAX_MTU, "krt_mtu"), + KRT_METRIC_INT(RTAX_WINDOW, "krt_window"), + KRT_METRIC_INT(RTAX_RTT, "krt_rtt"), + KRT_METRIC_INT(RTAX_RTTVAR, "krt_rttvar"), + KRT_METRIC_INT(RTAX_SSTHRESH, "krt_sstresh"), + KRT_METRIC_INT(RTAX_CWND, "krt_cwnd"), + KRT_METRIC_INT(RTAX_ADVMSS, "krt_advmss"), + KRT_METRIC_INT(RTAX_REORDERING, "krt_reordering"), + KRT_METRIC_INT(RTAX_HOPLIMIT, "krt_hoplimit"), + KRT_METRIC_INT(RTAX_INITCWND, "krt_initcwnd"), + KRT_METRIC_INT(RTAX_RTO_MIN, "krt_rto_min"), + KRT_METRIC_INT(RTAX_INITRWND, "krt_initrwnd"), + KRT_METRIC_INT(RTAX_QUICKACK, "krt_quickack"), +#undef KRT_METRIC_INT +}; + +static const char *krt_metrics_names[KRT_METRICS_MAX] = { + NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", + "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" +}; + +static const char *krt_features_names[KRT_FEATURES_MAX] = { + "ecn", NULL, NULL, "allfrag" +}; + +static void +krt_bitfield_format(const eattr *a, byte *buf, uint buflen) +{ + if (a->id == ea_krt_metrics[RTAX_LOCK].id) + ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); + else if (a->id == ea_krt_metrics[RTAX_FEATURES].id) + ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); +} + +static void +nl_ea_register(void) +{ + EA_REGISTER_ALL( + &ea_krt_prefsrc, + &ea_krt_realm, + &ea_krt_scope + ); + + for (uint i = 0; i < KRT_METRICS_MAX; i++) + { + if (!ea_krt_metrics[i].name) + ea_krt_metrics[i] = (struct ea_class) { + .name = mb_sprintf(&root_pool, "krt_metric_%d", i), + .type = T_INT, + }; + + ea_register_init(&ea_krt_metrics[i]); + } + + for (uint i = 1; i < KRT_METRICS_MAX; i++) + ASSERT_DIE(ea_krt_metrics[i].id == ea_krt_metrics[0].id + i); +} + + + +/* * Synchronous Netlink interface */ @@ -747,12 +843,12 @@ nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUS } static void -nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop_adata *nhad, int af, ea_list *eattrs) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); - eattr *flow = ea_find(eattrs, EA_KRT_REALM); + eattr *flow = ea_find(eattrs, &ea_krt_realm); - for (; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize); @@ -776,31 +872,44 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e nl_close_attr(h, a); } -static struct nexthop * +static struct nexthop_adata * nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src) { struct rtattr *a[BIRD_RTA_MAX]; - struct rtnexthop *nh = RTA_DATA(ra); - struct nexthop *rv, *first, **last; - unsigned len = RTA_PAYLOAD(ra); + struct rtnexthop *nh, *orig_nh = RTA_DATA(ra); + unsigned len, orig_len = RTA_PAYLOAD(ra); + uint cnt = 0; - first = NULL; - last = &first; + /* First count the nexthops */ + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) + { + /* Use RTNH_OK(nh,len) ?? */ + if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) + goto err; + + if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + ; + else + cnt++; + } + + struct nexthop_adata *nhad = lp_allocz(s->pool, cnt * NEXTHOP_MAX_SIZE + sizeof *nhad); + struct nexthop *rv = &nhad->nh; - while (len) + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) { /* Use RTNH_OK(nh,len) ?? */ if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) goto err; if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) - goto next; + continue; - *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); - last = &(rv->next); + *rv = (struct nexthop) { + .weight = nh->rtnh_hops, + .iface = if_find_by_index(nh->rtnh_ifindex), + }; - rv->weight = nh->rtnh_hops; - rv->iface = if_find_by_index(nh->rtnh_ifindex); if (!rv->iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex); @@ -879,16 +988,14 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr } #endif - next: - len -= NLMSG_ALIGN(nh->rtnh_len); - nh = RTNH_NEXT(nh); + rv = NEXTHOP_NEXT(rv); } - /* Ensure nexthops are sorted to satisfy nest invariant */ - if (!nexthop_is_sorted(first)) - first = nexthop_sort(first); + /* Store final length */ + nhad->ad.length = (void *) rv - (void *) nhad->ad.data; - return first; + /* Ensure nexthops are sorted to satisfy nest invariant */ + return nexthop_is_sorted(nhad) ? nhad : nexthop_sort(nhad, s->pool); err: log(L_ERR "KRT: Received strange multipath route %N", n); @@ -1316,11 +1423,16 @@ HASH_DEFINE_REHASH_FN(RTH, struct krt_proto) int krt_capable(rte *e) { - rta *a = e->attrs; + eattr *ea = ea_find(e->attrs, &ea_gen_nexthop); + if (!ea) + return 0; + + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) + return 1; - switch (a->dest) + switch (nhad->dest) { - case RTD_UNICAST: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: @@ -1332,22 +1444,21 @@ krt_capable(rte *e) } static inline int -nh_bufsize(struct nexthop *nh) +nh_bufsize(struct nexthop_adata *nhad) { int rv = 0; - for (; nh != NULL; nh = nh->next) + NEXTHOP_WALK(nh, nhad) rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr))); return rv; } static int -nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) +nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop_adata *nh) { eattr *ea; - net *net = e->net; - rta *a = e->attrs; - ea_list *eattrs = a->eattrs; - int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); + ea_list *eattrs = e->attrs; + + int bufsize = 128 + KRT_METRICS_MAX*8 + (nh ? nh_bufsize(nh) : 0); u32 priority = 0; struct { @@ -1359,7 +1470,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) int rsize = sizeof(*r) + bufsize; r = alloca(rsize); - DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); + DBG("nl_send_route(%N,op=%x)\n", e->net, op); bzero(&r->h, sizeof(r->h)); bzero(&r->r, sizeof(r->r)); @@ -1368,7 +1479,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; r->r.rtm_family = p->af; - r->r.rtm_dst_len = net_pxlen(net->n.addr); + r->r.rtm_dst_len = net_pxlen(e->net); r->r.rtm_protocol = RTPROT_BIRD; r->r.rtm_scope = RT_SCOPE_NOWHERE; #ifdef HAVE_MPLS_KERNEL @@ -1380,7 +1491,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) * 2) Never use RTA_PRIORITY */ - u32 label = net_mpls(net->n.addr); + u32 label = net_mpls(e->net); nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label); r->r.rtm_scope = RT_SCOPE_UNIVERSE; r->r.rtm_type = RTN_UNICAST; @@ -1388,12 +1499,12 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) else #endif { - nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); + nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(e->net)); /* Add source address for IPv6 SADR routes */ - if (net->n.addr->type == NET_IP6_SADR) + if (e->net->type == NET_IP6_SADR) { - net_addr_ip6_sadr *a = (void *) &net->n.addr; + net_addr_ip6_sadr *a = (void *) &e->net; nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix); r->r.rtm_src_len = a->src_pxlen; } @@ -1413,11 +1524,9 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) if (p->af == AF_MPLS) priority = 0; - else if (a->source == RTS_DUMMY) - priority = e->u.krt.metric; else if (KRT_CF->sys.metric) priority = KRT_CF->sys.metric; - else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC))) + else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, &ea_krt_metric))) priority = ea->u.data; if (priority) @@ -1430,15 +1539,15 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) /* Default scope is LINK for device routes, UNIVERSE otherwise */ if (p->af == AF_MPLS) r->r.rtm_scope = RT_SCOPE_UNIVERSE; - else if (ea = ea_find(eattrs, EA_KRT_SCOPE)) + else if (ea = ea_find(eattrs, &ea_krt_scope)) r->r.rtm_scope = ea->u.data; else - r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->nh.gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; - if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) + if (ea = ea_find(eattrs, &ea_krt_prefsrc)) nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); - if (ea = ea_find(eattrs, EA_KRT_REALM)) + if (ea = ea_find(eattrs, &ea_krt_realm)) nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); @@ -1446,9 +1555,9 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) metrics[0] = 0; struct ea_walk_state ews = { .eattrs = eattrs }; - while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX)) + while (ea = ea_walk(&ews, ea_krt_metrics[0].id, KRT_METRICS_MAX)) { - int id = ea->id - EA_KRT_METRICS; + int id = ea->id - ea_krt_metrics[0].id; metrics[0] |= 1 << id; metrics[id] = ea->u.data; } @@ -1462,14 +1571,14 @@ dest: { case RTD_UNICAST: r->r.rtm_type = RTN_UNICAST; - if (nh->next && !krt_ecmp6(p)) + if (!NEXTHOP_ONE(nh) && !krt_ecmp6(p)) nl_add_multipath(&r->h, rsize, nh, p->af, eattrs); else { - nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); - nl_add_nexthop(&r->h, rsize, nh, p->af); + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->nh.iface->index); + nl_add_nexthop(&r->h, rsize, &nh->nh, p->af); - if (nh->flags & RNF_ONLINK) + if (nh->nh.flags & RNF_ONLINK) r->r.rtm_flags |= RTNH_F_ONLINK; } break; @@ -1495,28 +1604,43 @@ dest: static inline int nl_add_rte(struct krt_proto *p, rte *e) { - rta *a = e->attrs; + ea_list *ea = e->attrs; int err = 0; - if (krt_ecmp6(p) && a->nh.next) - { - struct nexthop *nh = &(a->nh); - - err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh); - if (err < 0) - return err; + eattr *nhea = ea_find(ea, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; - for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh); + if (krt_ecmp6(p) && nhad && NEXTHOP_IS_REACHABLE(nhad) && !NEXTHOP_ONE(nhad)) + { + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + { + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nhx; + memcpy(&nhx.nhad.nh, nh, NEXTHOP_SIZE(nh)); + nhx.nhad.ad.length = (void *) NEXTHOP_NEXT(&nhx.nhad.nh) - (void *) nhx.nhad.ad.data; + + if (!cnt++) + { + err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, &nhx.nhad); + if (err < 0) + return err; + } + else + err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, &nhx.nhad); + } return err; } - return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh)); + return nl_send_route(p, e, NL_OP_ADD, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } static inline int -nl_delete_rte(struct krt_proto *p, rte *e) +nl_delete_rte(struct krt_proto *p, const rte *e) { int err = 0; @@ -1531,13 +1655,15 @@ nl_delete_rte(struct krt_proto *p, rte *e) static inline int nl_replace_rte(struct krt_proto *p, rte *e) { - rta *a = e->attrs; - return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh)); + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + return nl_send_route(p, e, NL_OP_REPLACE, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } void -krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) +krt_replace_rte(struct krt_proto *p, const net_addr *n UNUSED, rte *new, const rte *old) { int err = 0; @@ -1576,7 +1702,7 @@ krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) } static int -nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) +nl_mergable_route(struct nl_parse_state *s, const net_addr *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) { /* Route merging is used for IPv6 scans */ if (!s->scan || (rtm_family != AF_INET6)) @@ -1596,18 +1722,25 @@ nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint static void nl_announce_route(struct nl_parse_state *s) { - rte *e = rte_get_temp(s->attrs); - e->net = s->net; - e->u.krt.src = s->krt_src; - e->u.krt.proto = s->krt_proto; - e->u.krt.seen = 0; - e->u.krt.best = 0; - e->u.krt.metric = s->krt_metric; + rte e0 = { + .attrs = s->attrs, + .net = s->net, + }; + + EA_LOCAL_LIST(2) ea = { + .l = { .count = 2, .next = e0.attrs }, + .a = { + EA_LITERAL_EMBEDDED(&ea_krt_source, 0, s->krt_proto), + EA_LITERAL_EMBEDDED(&ea_krt_metric, 0, s->krt_metric), + }, + }; + + e0.attrs = &ea.l; if (s->scan) - krt_got_route(s->proto, e); + krt_got_route(s->proto, &e0, s->krt_src); else - krt_got_route_async(s->proto, e, s->new); + krt_got_route_async(s->proto, &e0, s->new, s->krt_src); s->net = NULL; s->attrs = NULL; @@ -1757,92 +1890,121 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) krt_src = KRT_SRC_ALIEN; } - net_addr *n = &dst; + net_addr *net = &dst; if (p->p.net_type == NET_IP6_SADR) { - n = alloca(sizeof(net_addr_ip6_sadr)); - net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst), + net = alloca(sizeof(net_addr_ip6_sadr)); + net_fill_ip6_sadr(net, net6_prefix(&dst), net6_pxlen(&dst), net6_prefix(&src), net6_pxlen(&src)); } - net *net = net_get(p->p.main_channel->table, n); - if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family)) nl_announce_route(s); - rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); - ra->src = p->p.main_source; - ra->source = RTS_INHERIT; - ra->scope = SCOPE_UNIVERSE; + ea_list *ra = NULL; + ea_set_attr_u32(&ra, &ea_gen_source, 0, RTS_INHERIT); if (a[RTA_FLOW]) s->rta_flow = rta_get_u32(a[RTA_FLOW]); else s->rta_flow = 0; + union { + struct { + struct adata ad; + struct nexthop nh; + u32 labels[MPLS_MAX_LABEL_STACK]; + }; + struct nexthop_adata nhad; + } nhad = {}; + switch (i->rtm_type) { case RTN_UNICAST: - ra->dest = RTD_UNICAST; - if (a[RTA_MULTIPATH]) { - struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src); + struct nexthop_adata *nh = nl_parse_multipath(s, p, net, a[RTA_MULTIPATH], i->rtm_family, krt_src); if (!nh) SKIP("strange RTA_MULTIPATH\n"); - nexthop_link(ra, nh); + ea_set_attr(&ra, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &nh->ad)); break; } if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) SKIP("ignore RTNH_F_DEAD\n"); - ra->nh.iface = if_find_by_index(oif); - if (!ra->nh.iface) + nhad.nh.iface = if_find_by_index(oif); + if (!nhad.nh.iface) { - log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", net, oif); return; } if (a[RTA_GATEWAY]) - ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); + nhad.nh.gw = rta_get_ipa(a[RTA_GATEWAY]); #ifdef HAVE_MPLS_KERNEL if (a[RTA_VIA]) - ra->nh.gw = rta_get_via(a[RTA_VIA]); + nhad.nh.gw = rta_get_via(a[RTA_VIA]); #endif - if (ipa_nonzero(ra->nh.gw)) + if (ipa_nonzero(nhad.nh.gw)) { /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) + if ((i->rtm_family == AF_INET6) && ipa_in_netX(nhad.nh.gw, (net_addr *) &sit)) return; if (i->rtm_flags & RTNH_F_ONLINK) - ra->nh.flags |= RNF_ONLINK; + nhad.nh.flags |= RNF_ONLINK; neighbor *nbr; - nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface, - (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + nbr = neigh_find(&p->p, nhad.nh.gw, nhad.nh.iface, + (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, - ra->nh.gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net, + nhad.nh.gw); return; } } +#ifdef HAVE_MPLS_KERNEL + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !a[RTA_MULTIPATH]) + nhad.nh.labels = rta_get_mpls(a[RTA_NEWDST], nhad.nh.label); + + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !a[RTA_MULTIPATH]) + { + switch (rta_get_u16(a[RTA_ENCAP_TYPE])) + { + case LWTUNNEL_ENCAP_MPLS: + { + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + nhad.nh.labels = rta_get_mpls(enca[RTA_DST], nhad.nh.label); + break; + } + default: + SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); + break; + } + } +#endif + + /* Finalize the nexthop */ + nhad.ad.length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data; break; case RTN_BLACKHOLE: - ra->dest = RTD_BLACKHOLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_BLACKHOLE); break; case RTN_UNREACHABLE: - ra->dest = RTD_UNREACHABLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_UNREACHABLE); break; case RTN_PROHIBIT: - ra->dest = RTD_PROHIBIT; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_PROHIBIT); break; /* FIXME: What about RTN_THROW? */ default: @@ -1850,105 +2012,36 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; } -#ifdef HAVE_MPLS_KERNEL - if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) - ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); - - if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) - { - switch (rta_get_u16(a[RTA_ENCAP_TYPE])) - { - case LWTUNNEL_ENCAP_MPLS: - { - struct rtattr *enca[BIRD_RTA_MAX]; - nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); - nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); - ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); - break; - } - default: - SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); - break; - } - } -#endif - if (i->rtm_scope != def_scope) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_SCOPE; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = i->rtm_scope; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_scope, 0, i->rtm_scope)); if (a[RTA_PREFSRC]) - { - ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_PREFSRC; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_IP_ADDRESS; - - struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps)); - ad->length = sizeof(ps); - memcpy(ad->data, &ps, sizeof(ps)); - - ea->attrs[0].u.ptr = ad; - } + { + ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); + + ea_set_attr(&ra, + EA_LITERAL_STORE_ADATA(&ea_krt_prefsrc, 0, &ps, sizeof(ps))); + } /* Can be set per-route or per-nexthop */ if (s->rta_flow) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_REALM; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = s->rta_flow; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_realm, 0, s->rta_flow)); if (a[RTA_METRICS]) { u32 metrics[KRT_METRICS_MAX]; - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); - int t, n = 0; - if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) { - log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr); + log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net); return; } - for (t = 1; t < KRT_METRICS_MAX; t++) + for (uint t = 1; t < KRT_METRICS_MAX; t++) if (metrics[0] & (1 << t)) - { - ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t); - ea->attrs[n].flags = 0; - ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */ - ea->attrs[n].u.data = metrics[t]; - n++; - } - - if (n > 0) - { - ea->next = ra->eattrs; - ea->flags = EALF_SORTED; - ea->count = n; - ra->eattrs = ea; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_metrics[t], 0, metrics[t])); } /* @@ -1962,7 +2055,12 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (!s->net) { /* Store the new route */ - s->net = net; + s->net = lp_alloc(s->pool, net->length); + net_copy(s->net, net); + + ea_set_attr_data(&ra, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); + s->attrs = ra; s->proto = p; s->new = new; @@ -1974,20 +2072,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *oa = s->attrs; - - struct nexthop *nhs = &oa->nh; - nexthop_insert(&nhs, &ra->nh); - - /* Perhaps new nexthop is inserted at the first position */ - if (nhs == &ra->nh) - { - /* Swap rtas */ - s->attrs = ra; - - /* Keep old eattrs */ - ra->eattrs = oa->eattrs; - } + eattr *nhea = ea_find(s->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad_old = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + + if (nhad_old) + ea_set_attr(&s->attrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_merge(nhad_old, &nhad.nhad, + KRT_CF->merge_paths, s->pool)->ad) + )); + else + ea_set_attr_data(&s->attrs, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); } } @@ -2195,6 +2291,8 @@ krt_sys_io_init(void) { nl_linpool = lp_new_default(krt_pool); HASH_INIT(nl_table_map, krt_pool, 6); + + nl_ea_register(); } int @@ -2248,56 +2346,6 @@ krt_sys_copy_config(struct krt_config *d, struct krt_config *s) d->sys.metric = s->sys.metric; } -static const char *krt_metrics_names[KRT_METRICS_MAX] = { - NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", - "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" -}; - -static const char *krt_features_names[KRT_FEATURES_MAX] = { - "ecn", NULL, NULL, "allfrag" -}; - -int -krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED) -{ - switch (a->id) - { - case EA_KRT_PREFSRC: - bsprintf(buf, "prefsrc"); - return GA_NAME; - - case EA_KRT_REALM: - bsprintf(buf, "realm"); - return GA_NAME; - - case EA_KRT_SCOPE: - bsprintf(buf, "scope"); - return GA_NAME; - - case EA_KRT_LOCK: - buf += bsprintf(buf, "lock:"); - ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); - return GA_FULL; - - case EA_KRT_FEATURES: - buf += bsprintf(buf, "features:"); - ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); - return GA_FULL; - - default:; - int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET; - if (id > 0 && id < KRT_METRICS_MAX) - { - bsprintf(buf, "%s", krt_metrics_names[id]); - return GA_NAME; - } - - return GA_UNKNOWN; - } -} - - - void kif_sys_start(struct kif_proto *p UNUSED) { |