From 943478b00f585725c3e7406909ee867dcfac5f87 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Wed, 6 Sep 2017 17:38:48 +0200 Subject: Basic VRF support Add basic VRF (virtual routing and forwarding) support. Protocols can be associated with VRFs, such protocols will be restricted to interfaces assigned to the VRF (as reported by Linux kernel) and will use sockets bound to the VRF. E.g., different multihop BGP instances can use diffent kernel routing tables to handle BGP TCP connections. The VRF support is preliminary, currently there are several limitations: - Recent Linux kernels (4.11) do not handle correctly sockets bound to interaces that are part of VRF, so most protocols other than multihop BGP do not work. This will be fixed by future kernel versions. - Neighbor cache ignores VRFs. Breaks config with the same prefix on local interfaces in different VRFs. Not much problem as single hop protocols do not work anyways. - Olock code ignores VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. - Incoming BGP connections are not dispatched according to VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. Perhaps we would need some kernel API to read VRF of incoming connection? Or probably use multiple listening sockets in int-new branch. - We should handle master VRF interface up/down events and perhaps disable associated protocols when VRF goes down. Or at least disable associated interfaces. - Also we should check if the master iface is really VRF iface and not some other kind of master iface. - BFD session request dispatch should be aware of VRFs. - Perhaps kernel protocol should read default kernel table ID from VRF iface so it is not necessary to configure it. - Perhaps we should have per-VRF default table. --- proto/bgp/bgp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'proto/bgp') diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index f706e76e..913685a4 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -745,6 +745,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c s->daddr = p->cf->remote_ip; s->dport = p->cf->remote_port; s->iface = p->neigh ? p->neigh->iface : NULL; + s->vrf = p->p.vrf; s->ttl = p->cf->ttl_security ? 255 : hops; s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE; s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE; -- cgit v1.2.3 From 9f4908fe78cb3e5191bca721588ee1acb10876e3 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Tue, 12 Sep 2017 15:49:36 +0200 Subject: Nest: VRF support for neighbor cache and olock code Actually much simpler than expected. --- nest/locks.c | 1 + nest/locks.h | 1 + nest/neighbor.c | 8 +++++--- proto/bgp/bgp.c | 1 + proto/radv/radv.c | 1 - 5 files changed, 8 insertions(+), 4 deletions(-) (limited to 'proto/bgp') diff --git a/nest/locks.c b/nest/locks.c index 84b8b0ae..86c9ff14 100644 --- a/nest/locks.c +++ b/nest/locks.c @@ -45,6 +45,7 @@ olock_same(struct object_lock *x, struct object_lock *y) return x->type == y->type && x->iface == y->iface && + x->vrf == y->vrf && x->port == y->port && x->inst == y->inst && ipa_equal(x->addr, y->addr); diff --git a/nest/locks.h b/nest/locks.h index 3d58c8ed..37026c68 100644 --- a/nest/locks.h +++ b/nest/locks.h @@ -30,6 +30,7 @@ struct object_lock { uint port; /* ... port number */ uint inst; /* ... instance ID */ struct iface *iface; /* ... interface */ + struct iface *vrf; /* ... or VRF (if iface is unknown) */ void (*hook)(struct object_lock *); /* Called when the lock succeeds */ void *data; /* User data */ /* ... internal to lock manager, don't touch ... */ diff --git a/nest/neighbor.c b/nest/neighbor.c index d974fa51..f8159d35 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -30,7 +30,8 @@ * when the protocol has explicitly requested it via the %NEF_STICKY * flag because it wishes to be notified when the node will again become * a neighbor. Such entries are enqueued in a special list which is walked - * whenever an interface changes its state to up. + * whenever an interface changes its state to up. Neighbor entry VRF + * association is implied by respective protocol. * * When a neighbor event occurs (a neighbor gets disconnected or a sticky * inactive neighbor becomes connected), the protocol hook neigh_notify() @@ -152,8 +153,9 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) } else WALK_LIST(i, iface_list) - if ((scope = if_connected(a, i, &addr)) >= 0) - { + if ((!p->vrf || p->vrf == i->master) && + ((scope = if_connected(a, i, &addr)) >= 0)) + { ifa = i; break; } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 913685a4..8a6b2f02 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1181,6 +1181,7 @@ bgp_start(struct proto *P) lock->addr = p->cf->remote_ip; lock->port = p->cf->remote_port; lock->iface = p->cf->iface; + lock->vrf = p->cf->iface ? NULL : p->p.vrf; lock->type = OBJLOCK_TCP; lock->hook = bgp_start_locked; lock->data = p; diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 227c8ef6..c53a0a95 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -321,7 +321,6 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf ifa->timer = tm; struct object_lock *lock = olock_new(pool); - lock->addr = IPA_NONE; lock->type = OBJLOCK_IP; lock->port = ICMPV6_PROTO; lock->iface = iface; -- cgit v1.2.3 From f2dd602fef2ecf0a6598b817d71ce2ee8fadd5cc Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Tue, 12 Sep 2017 16:33:29 +0200 Subject: Backport some minor changes from int-new --- lib/birdlib.h | 3 +++ nest/protocol.h | 12 ++++++++++++ nest/route.h | 5 +++-- nest/rt-table.c | 16 ++++------------ proto/bgp/attrs.c | 2 -- 5 files changed, 22 insertions(+), 16 deletions(-) (limited to 'proto/bgp') diff --git a/lib/birdlib.h b/lib/birdlib.h index d21cdf1f..aaa7a0a3 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -37,6 +37,9 @@ struct align_probe { char x; long int y; }; #define ABS(a) ((a)>=0 ? (a) : -(a)) #define DELTA(a,b) (((a)>=(b))?(a)-(b):(b)-(a)) #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a))) +#define BYTES(n) ((((uint) (n)) + 7) / 8) +#define CALL(fn, args...) ({ if (fn) fn(args); }) +#define ADVANCE(w, r, l) ({ r -= l; w += l; }) /* Bitfield macros */ diff --git a/nest/protocol.h b/nest/protocol.h index 3c30a581..18dfbd6f 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -12,6 +12,7 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/timer.h" +#include "nest/route.h" #include "conf/conf.h" struct iface; @@ -287,6 +288,17 @@ proto_get_router_id(struct proto_config *pc) return pc->router_id ? pc->router_id : pc->global->router_id; } +static inline struct ea_list * +rte_make_tmp_attrs(struct rte *rt, struct linpool *pool) +{ + struct ea_list *(*mta)(struct rte *rt, struct linpool *pool); + mta = rt->attrs->src->proto->make_tmp_attrs; + return mta ? mta(rt, pool) : NULL; +} + +/* Moved from route.h to avoid dependency conflicts */ +static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } + extern list active_proto_list; /* diff --git a/nest/route.h b/nest/route.h index 383f4def..2e6ae5bf 100644 --- a/nest/route.h +++ b/nest/route.h @@ -12,10 +12,11 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/timer.h" -#include "nest/protocol.h" +struct ea_list; struct protocol; struct proto; +struct rte_src; struct symbol; struct filter; struct cli; @@ -273,7 +274,7 @@ static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) { return (ne rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src); -static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } +/* rte_update() moved to protocol.h to avoid dependency conflicts */ int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); void rt_refresh_begin(rtable *t, struct announce_hook *ah); diff --git a/nest/rt-table.c b/nest/rt-table.c index c6e48c38..bcb48b53 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -60,14 +60,6 @@ static inline void rt_schedule_gc(rtable *tab); static inline void rt_schedule_prune(rtable *tab); -static inline struct ea_list * -make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - struct ea_list *(*mta)(struct rte *rt, struct linpool *pool); - mta = rt->attrs->src->proto->make_tmp_attrs; - return mta ? mta(rt, pool) : NULL; -} - /* Like fib_route(), but skips empty net entries */ static net * net_route(rtable *tab, ip_addr a, int len) @@ -260,7 +252,7 @@ export_filter_(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa if (!tmpa) tmpa = &tmpb; - *tmpa = make_tmp_attrs(rt, pool); + *tmpa = rte_make_tmp_attrs(rt, pool); v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0; if (v < 0) @@ -1223,7 +1215,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) } else { - tmpa = make_tmp_attrs(new, rte_update_pool); + tmpa = rte_make_tmp_attrs(new, rte_update_pool); if (filter && (filter != FILTER_REJECT)) { ea_list *old_tmpa = tmpa; @@ -1303,7 +1295,7 @@ rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter rte_update_lock(); /* Rest is stripped down export_filter() */ - ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool); + ea_list *tmpa = rte_make_tmp_attrs(rt, rte_update_pool); int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0; if (v == RIC_PROCESS) v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT); @@ -2470,7 +2462,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) ee = e; rte_update_lock(); /* We use the update buffer for filtering */ - tmpa = make_tmp_attrs(e, rte_update_pool); + tmpa = rte_make_tmp_attrs(e, rte_update_pool); /* Special case for merged export */ if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED)) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index b9e2490d..73eb4040 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -469,8 +469,6 @@ bgp_get_attr_len(eattr *a) return len; } -#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0) - /** * bgp_encode_attrs - encode BGP attributes * @p: BGP instance -- cgit v1.2.3