From 05d8c3699d51866c68747167556e7c1f06390afe Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 24 Jan 2023 11:01:34 +0100 Subject: Object locks use events Instead of calling custom hooks from object locks, we use standard event sending mechanism to inform protocols about object lock changes. This is a backport from version 3 where these events are passed across threads. This implementation of object locks doesn't use mutexes to lock the whole data structure. In version 3, this data structure may get accessed from multiple threads and must be protected by mutex. --- proto/bgp/bgp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'proto/bgp/bgp.c') diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 2e442e16..5b0569ae 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1467,9 +1467,9 @@ bgp_feed_end(struct channel *C) static void -bgp_start_locked(struct object_lock *lock) +bgp_start_locked(void *_p) { - struct bgp_proto *p = lock->data; + struct bgp_proto *p = _p; const struct bgp_config *cf = p->cf; if (p->p.proto_state != PS_START) @@ -1574,8 +1574,10 @@ bgp_start(struct proto *P) lock->iface = p->cf->iface; lock->vrf = p->cf->iface ? NULL : p->p.vrf; lock->type = OBJLOCK_TCP; - lock->hook = bgp_start_locked; - lock->data = p; + lock->event = (event) { + .hook = bgp_start_locked, + .data = p, + }; /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */ if (bgp_is_dynamic(p)) -- cgit v1.2.3 From c354e8f4c199ca7dec441394156d18badac71b81 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 31 Jan 2023 13:07:46 +0100 Subject: Interface updates are asynchronous Instead of propagating interface updates as they are loaded from kernel, they are enqueued and all the notifications are called from a protocol-specific event. This change allows to break the locking loop between protocols and interfaces. Anyway, this change is based on v2 branch to keep the changes between v2 and v3 smaller. --- nest/iface.c | 203 ++++++++++++++++++++++++++++++++++++++++++++------ nest/iface.h | 66 +++++++++++++++- nest/neighbor.c | 63 +++++++++++----- nest/proto.c | 14 +++- nest/protocol.h | 4 +- nest/rt-dev.c | 4 +- proto/babel/babel.c | 2 +- proto/bfd/bfd.c | 2 +- proto/bgp/bgp.c | 2 +- proto/ospf/ospf.c | 4 +- proto/perf/perf.c | 2 +- proto/radv/radv.c | 4 +- proto/rip/rip.c | 4 +- proto/static/static.c | 2 +- sysdep/unix/krt.c | 2 +- 15 files changed, 315 insertions(+), 63 deletions(-) (limited to 'proto/bgp/bgp.c') diff --git a/nest/iface.c b/nest/iface.c index fc63dc75..740c1878 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -34,6 +34,9 @@ #include "conf/conf.h" #include "sysdep/unix/krt.h" + +static TLIST_LIST(ifsub) iface_sub_list; +static slab *iface_sub_slab; static pool *if_pool; list iface_list; @@ -140,13 +143,51 @@ if_copy(struct iface *to, struct iface *from) to->flags = from->flags | (to->flags & IF_TMP_DOWN); to->mtu = from->mtu; to->master_index = from->master_index; - to->master = from->master; + + if_unlink(to->master); + if_link(to->master = from->master); +} + +void +if_enqueue_notify_to(struct iface_notification x, struct iface_subscription *s) +{ + switch (x.type) { + case IFNOT_ADDRESS: + if (!s->ifa_notify) return; + ifa_link(x.a); + break; + case IFNOT_INTERFACE: + if (!s->if_notify) return; + if_link(x.i); + break; + case IFNOT_NEIGHBOR: + if (!s->neigh_notify) return; + neigh_link(x.n); + break; + default: + bug("Unknown interface notification type: %d", x.type); + } + + struct iface_notification *in = sl_alloc(iface_sub_slab); + *in = x; + + ifnot_add_tail(&s->queue, in); + ev_schedule(&s->event); +} + +void +if_enqueue_notify(struct iface_notification x) +{ + WALK_TLIST(ifsub, s, &iface_sub_list) + if_enqueue_notify_to(x, s); } static inline void -ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) +ifa_send_notify(struct iface_subscription *s, unsigned c, struct ifa *a) { - if (p->ifa_notify && + struct proto *p = SKIP_BACK(struct proto, iface_sub, s); + + if (s->ifa_notify && (p->proto_state != PS_DOWN) && (!p->vrf || p->vrf == a->iface->master)) { @@ -154,19 +195,21 @@ ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) log(L_TRACE "%s < address %N on interface %s %s", p->name, &a->prefix, a->iface->name, (c & IF_CHANGE_UP) ? "added" : "removed"); - p->ifa_notify(p, c, a); + s->ifa_notify(p, c, a); } } static void ifa_notify_change_(unsigned c, struct ifa *a) { - struct proto *p; - DBG("IFA change notification (%x) for %s:%I\n", c, a->iface->name, a->ip); - WALK_LIST(p, proto_list) - ifa_send_notify(p, c, a); + if_enqueue_notify((struct iface_notification) { + .type = IFNOT_ADDRESS, + .a = a, + .flags = c, + }); + } static inline void @@ -182,9 +225,11 @@ ifa_notify_change(unsigned c, struct ifa *a) } static inline void -if_send_notify(struct proto *p, unsigned c, struct iface *i) +if_send_notify(struct iface_subscription *s, unsigned c, struct iface *i) { - if (p->if_notify && + struct proto *p = SKIP_BACK(struct proto, iface_sub, s); + + if (s->if_notify && (p->proto_state != PS_DOWN) && (!p->vrf || p->vrf == i->master)) { @@ -197,14 +242,13 @@ if_send_notify(struct proto *p, unsigned c, struct iface *i) (c & IF_CHANGE_PREFERRED) ? "changes preferred address" : (c & IF_CHANGE_CREATE) ? "created" : "sends unknown event"); - p->if_notify(p, c, i); + s->if_notify(p, c, i); } } static void if_notify_change(unsigned c, struct iface *i) { - struct proto *p; struct ifa *a; if (i->flags & IF_JUST_CREATED) @@ -225,8 +269,11 @@ if_notify_change(unsigned c, struct iface *i) WALK_LIST(a, i->addrs) ifa_notify_change_(IF_CHANGE_DOWN, a); - WALK_LIST(p, proto_list) - if_send_notify(p, c, i); + if_enqueue_notify((struct iface_notification) { + .type = IFNOT_INTERFACE, + .i = i, + .flags = c, + }); if (c & IF_CHANGE_UP) WALK_LIST(a, i->addrs) @@ -320,6 +367,7 @@ if_update(struct iface *new) new->llv6 = i->llv6; new->sysdep = i->sysdep; memcpy(&new->addrs, &i->addrs, sizeof(i->addrs)); + memcpy(&new->neighbors, &i->neighbors, sizeof(i->neighbors)); memcpy(i, new, sizeof(*i)); i->flags &= ~IF_UP; /* IF_TMP_DOWN will be added later */ goto newif; @@ -334,9 +382,10 @@ if_update(struct iface *new) } i = mb_alloc(if_pool, sizeof(struct iface)); memcpy(i, new, sizeof(*i)); + if_link(i->master); init_list(&i->addrs); -newif: init_list(&i->neighbors); +newif: i->flags |= IF_UPDATED | IF_TMP_DOWN; /* Tmp down as we don't have addresses yet */ add_tail(&iface_list, &i->n); return i; @@ -386,31 +435,116 @@ if_end_update(void) } } +void +if_link(struct iface *i) +{ + if (i) + i->uc++; +} + +void +if_unlink(struct iface *i) +{ + if (i) + i->uc--; + /* TODO: Do some interface object cleanup */ +} + +static void +iface_notify_hook(void *_s) +{ + struct iface_subscription *s = _s; + + while (!EMPTY_TLIST(ifnot, &s->queue)) + { + struct iface_notification *n = THEAD(ifnot, &s->queue); + switch (n->type) { + case IFNOT_ADDRESS: + ifa_send_notify(s, n->flags, n->a); + ifa_unlink(n->a); + break; + case IFNOT_INTERFACE: + if_send_notify(s, n->flags, n->i); + if_unlink(n->i); + break; + case IFNOT_NEIGHBOR: + s->neigh_notify(n->n); + neigh_unlink(n->n); + break; + default: + bug("Bad interface notification type: %d", n->type); + } + + ifnot_rem_node(&s->queue, n); + sl_free(n); + } +} + + /** - * if_feed_baby - advertise interfaces to a new protocol - * @p: protocol to feed + * iface_subscribe - request interface updates + * @s: subscription structure * * When a new protocol starts, this function sends it a series * of notifications about all existing interfaces. */ void -if_feed_baby(struct proto *p) +iface_subscribe(struct iface_subscription *s) { - struct iface *i; - struct ifa *a; + ifsub_add_tail(&iface_sub_list, s); + s->event = (event) { + .hook = iface_notify_hook, + .data = s, + }; - if (!p->if_notify && !p->ifa_notify) /* shortcut */ + if (!s->if_notify && !s->ifa_notify) /* shortcut */ return; + + struct iface *i; DBG("Announcing interfaces to new protocol %s\n", p->name); WALK_LIST(i, iface_list) { - if_send_notify(p, IF_CHANGE_CREATE | ((i->flags & IF_UP) ? IF_CHANGE_UP : 0), i); + if_send_notify(s, IF_CHANGE_CREATE | ((i->flags & IF_UP) ? IF_CHANGE_UP : 0), i); + + struct ifa *a; if (i->flags & IF_UP) WALK_LIST(a, i->addrs) - ifa_send_notify(p, IF_CHANGE_CREATE | IF_CHANGE_UP, a); + ifa_send_notify(s, IF_CHANGE_CREATE | IF_CHANGE_UP, a); } } +/** + * iface_unsubscribe - unsubscribe from interface updates + * @s: subscription structure + */ +void +iface_unsubscribe(struct iface_subscription *s) +{ + ifsub_rem_node(&iface_sub_list, s); + ev_postpone(&s->event); + + WALK_TLIST_DELSAFE(ifnot, n, &s->queue) + { + switch (n->type) + { + case IFNOT_ADDRESS: + ifa_unlink(n->a); + break; + case IFNOT_INTERFACE: + if_unlink(n->i); + break; + case IFNOT_NEIGHBOR: + neigh_unlink(n->n); + break; + default: + bug("Bad interface notification type: %d", n->type); + } + + ifnot_rem_node(&s->queue, n); + sl_free(n); + } +} + /** * if_find_by_index - find interface by ifindex * @idx: ifindex @@ -600,6 +734,8 @@ ifa_update(struct ifa *a) b = mb_alloc(if_pool, sizeof(struct ifa)); memcpy(b, a, sizeof(struct ifa)); + ifa_link(b); + if_link(i); add_tail(&i->addrs, &b->n); b->flags |= IA_UPDATED; @@ -646,11 +782,29 @@ ifa_delete(struct ifa *a) if (i->flags & IF_UP) ifa_notify_change(IF_CHANGE_DOWN, b); - mb_free(b); + ifa_unlink(b); return; } } +void ifa_link(struct ifa *a) +{ + if (a) + a->uc++; +} + +void ifa_unlink(struct ifa *a) +{ + if (!a) + return; + + if (--a->uc) + return; + + if_unlink(a->iface); + mb_free(a); +} + u32 if_choose_router_id(struct iface_patt *mask, u32 old_id) { @@ -706,6 +860,7 @@ if_init(void) { if_pool = rp_new(&root_pool, "Interfaces"); init_list(&iface_list); + iface_sub_slab = sl_new(if_pool, sizeof(struct iface_notification)); strcpy(default_vrf.name, "default"); neigh_init(if_pool); } diff --git a/nest/iface.h b/nest/iface.h index fb27f99e..a3f4f30a 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -9,6 +9,7 @@ #ifndef _BIRD_IFACE_H_ #define _BIRD_IFACE_H_ +#include "lib/event.h" #include "lib/lists.h" #include "lib/tlists.h" #include "lib/ip.h" @@ -27,6 +28,7 @@ struct ifa { /* Interface address */ ip_addr opposite; /* Opposite end of a point-to-point link */ unsigned scope; /* Interface address scope */ unsigned flags; /* Analogous to iface->flags */ + unsigned uc; /* Use (link) count */ }; extern struct iface default_vrf; @@ -45,6 +47,7 @@ struct iface { struct ifa *llv6; /* Primary link-local address for IPv6 */ ip4_addr sysdep; /* Arbitrary IPv4 address for internal sysdep use */ list neighbors; /* All neighbors on this interface */ + unsigned uc; /* Use (link) count */ }; #define IF_UP 1 /* Currently just IF_ADMIN_UP */ @@ -115,12 +118,15 @@ void ifa_delete(struct ifa *); void if_start_update(void); void if_end_partial_update(struct iface *); void if_end_update(void); -void if_feed_baby(struct proto *); struct iface *if_find_by_index(unsigned); struct iface *if_find_by_name(const char *); struct iface *if_get_by_name(const char *); void if_recalc_all_preferred_addresses(void); +void if_link(struct iface *); +void if_unlink(struct iface *); +void ifa_link(struct ifa *); +void ifa_unlink(struct ifa *); /* The Neighbor Cache */ @@ -138,6 +144,7 @@ typedef struct neighbor { u16 flags; /* NEF_* flags */ s16 scope; /* Address scope, -1 for unreachable neighbors, SCOPE_HOST when it's our own address */ + uint uc; /* Use (link) count */ } neighbor; #define TLIST_PREFIX proto_neigh @@ -164,6 +171,63 @@ void neigh_ifa_up(struct ifa *a); void neigh_ifa_down(struct ifa *a); void neigh_init(struct pool *); +void neigh_link(neighbor *); +void neigh_unlink(neighbor *); + +/* + * Notification mechanism + */ + +#define TLIST_PREFIX ifnot +#define TLIST_TYPE struct iface_notification +#define TLIST_ITEM nn +#define TLIST_WANT_WALK +#define TLIST_WANT_ADD_TAIL + +struct iface_notification { + TLIST_DEFAULT_NODE; + enum { + IFNOT_INVALID, + IFNOT_ADDRESS, + IFNOT_INTERFACE, + IFNOT_NEIGHBOR, + } type; + unsigned flags; + union { + struct ifa *a; + struct iface *i; + neighbor *n; + }; +}; + +#include "lib/tlists.h" + +#define TLIST_PREFIX ifsub +#define TLIST_TYPE struct iface_subscription +#define TLIST_ITEM n +#define TLIST_WANT_WALK +#define TLIST_WANT_ADD_TAIL + +struct iface_subscription { + TLIST_DEFAULT_NODE; + + event event; + TLIST_LIST(ifnot) queue; + + void (*if_notify)(struct proto *, unsigned flags, struct iface *i); + void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); + void (*neigh_notify)(struct neighbor *neigh); +}; + +#include "lib/tlists.h" + +void if_enqueue_notify(struct iface_notification); +void if_enqueue_notify_to(struct iface_notification x, struct iface_subscription *s); + +void iface_flush_notifications(struct iface_subscription *); +void iface_subscribe(struct iface_subscription *); +void iface_unsubscribe(struct iface_subscription *); + /* * Interface Pattern Lists */ diff --git a/nest/neighbor.c b/nest/neighbor.c index c27db989..88ac2860 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -258,13 +258,15 @@ neigh_find(struct proto *p, ip_addr a, struct iface *iface, uint flags) add_tail((scope >= 0) ? &iface->neighbors : &sticky_neigh_list, &n->if_n); proto_neigh_add_tail(&p->neighbors, n); n->addr = a; - n->ifa = addr; - n->iface = iface; - n->ifreq = ifreq; + ifa_link(n->ifa = addr); + if_link(n->iface = iface); + if_link(n->ifreq = ifreq); n->proto = p; n->flags = flags; n->scope = scope; + neigh_link(n); + return n; } @@ -309,19 +311,20 @@ neigh_dump_all(void) static inline void neigh_notify(neighbor *n) { - if (n->proto && n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) - n->proto->neigh_notify(n); + if_enqueue_notify_to((struct iface_notification) { .type = IFNOT_NEIGHBOR, .n = n, }, &n->proto->iface_sub); } static void neigh_up(neighbor *n, struct iface *i, struct ifa *a, int scope) { DBG("Waking up sticky neighbor %I\n", n->addr); - n->iface = i; - n->ifa = a; + if_link(n->iface = i); + ifa_link(n->ifa = a); + n->scope = scope; - rem_node(&n->if_n); + rem_node(&n->if_n); /* HACK: Here the neighbor is always in the sticky list, + regardless whether it is sticky or not */ add_tail(&i->neighbors, &n->if_n); neigh_notify(n); @@ -331,25 +334,48 @@ static void neigh_down(neighbor *n) { DBG("Flushing neighbor %I on %s\n", n->addr, n->iface->name); - n->iface = NULL; - n->ifa = NULL; + n->scope = -1; rem_node(&n->if_n); add_tail(&sticky_neigh_list, &n->if_n); + ifa_unlink(n->ifa); + n->ifa = NULL; + + if_unlink(n->iface); + n->iface = NULL; + neigh_notify(n); } -static inline void -neigh_free(neighbor *n) +void +neigh_link(neighbor *n) { - proto_neigh_rem_node(&n->proto->neighbors, n); + n->uc++; +} + +void +neigh_unlink(neighbor *n) +{ + if (--n->uc) + return; + + struct proto *p = n->proto; + proto_neigh_rem_node(&p->neighbors, n); + + if ((p->proto_state == PS_DOWN) && EMPTY_TLIST(proto_neigh, &p->neighbors)) + ev_schedule(p->event); + n->proto = NULL; rem_node(&n->n); rem_node(&n->if_n); + ifa_unlink(n->ifa); + if_unlink(n->iface); + if_unlink(n->ifreq); + sl_free(n); } @@ -399,7 +425,8 @@ neigh_update(neighbor *n, struct iface *iface) { if (ifa != n->ifa) { - n->ifa = ifa; + ifa_unlink(n->ifa); + ifa_link(n->ifa = ifa); neigh_notify(n); } @@ -413,7 +440,7 @@ neigh_update(neighbor *n, struct iface *iface) if ((n->scope < 0) && !(n->flags & NEF_STICKY)) { - neigh_free(n); + neigh_unlink(n); return; } @@ -534,8 +561,10 @@ neigh_ifa_down(struct ifa *a) void neigh_prune(struct proto *p) { - while (!EMPTY_TLIST(proto_neigh, &p->neighbors)) - neigh_free(THEAD(proto_neigh, &p->neighbors)); + WALK_TLIST_DELSAFE(proto_neigh, n, &p->neighbors) + neigh_unlink(n); + + ASSERT_DIE(EMPTY_TLIST(proto_neigh, &p->neighbors)); } /** diff --git a/nest/proto.c b/nest/proto.c index 2614943c..39e8b999 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -57,7 +57,11 @@ static inline void channel_reset_limit(struct channel_limit *l); static inline int proto_is_done(struct proto *p) -{ return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } +{ + return (p->proto_state == PS_DOWN) + && (p->active_channels == 0) + && EMPTY_TLIST(proto_neigh, &p->neighbors); +} static inline int channel_is_active(struct channel *c) { return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } @@ -962,14 +966,18 @@ proto_event(void *ptr) if (p->do_start) { - if_feed_baby(p); + iface_subscribe(&p->iface_sub); p->do_start = 0; } if (p->do_stop) { + iface_unsubscribe(&p->iface_sub); + neigh_prune(p); + p->do_stop = 0; } + if (proto_is_done(p)) { if (p->proto->cleanup) @@ -1860,7 +1868,6 @@ proto_do_stop(struct proto *p) p->down_sched = 0; p->gr_recovery = 0; - if (p->main_source) { rt_unlock_source(p->main_source); @@ -1877,7 +1884,6 @@ static void proto_do_down(struct proto *p) { p->down_code = 0; - neigh_prune(p); rfree(p->pool); p->pool = NULL; diff --git a/nest/protocol.h b/nest/protocol.h index 9fbe9158..6d5714a7 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -171,6 +171,7 @@ struct proto { struct rte_src *main_source; /* Primary route source */ struct iface *vrf; /* Related VRF instance, NULL if global */ TLIST_LIST(proto_neigh) neighbors; /* List of neighbor structures */ + struct iface_subscription iface_sub; /* Interface notification subscription */ const char *name; /* Name of this instance (== cf->name) */ u32 debug; /* Debugging flags */ @@ -210,10 +211,7 @@ struct proto { * feed_end Notify channel about finish of route feeding. */ - void (*if_notify)(struct proto *, unsigned flags, struct iface *i); - void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); void (*rt_notify)(struct proto *, struct channel *, struct network *net, struct rte *new, struct rte *old); - void (*neigh_notify)(struct neighbor *neigh); int (*preexport)(struct channel *, struct rte *rt); void (*reload_routes)(struct channel *); void (*feed_begin)(struct channel *, int initial); diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 7932b8b7..42b6b499 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -141,8 +141,8 @@ dev_init(struct proto_config *CF) proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel); proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel); - P->if_notify = dev_if_notify; - P->ifa_notify = dev_ifa_notify; + P->iface_sub.if_notify = dev_if_notify; + P->iface_sub.ifa_notify = dev_ifa_notify; return P; } diff --git a/proto/babel/babel.c b/proto/babel/babel.c index e1b31e86..15135922 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -2424,7 +2424,7 @@ babel_init(struct proto_config *CF) proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel); proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel); - P->if_notify = babel_if_notify; + P->iface_sub.if_notify = babel_if_notify; P->rt_notify = babel_rt_notify; P->preexport = babel_preexport; P->rte_better = babel_rte_better; diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 69e42f3d..b2655952 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -1012,7 +1012,7 @@ bfd_init(struct proto_config *c) { struct proto *p = proto_new(c); - p->neigh_notify = bfd_neigh_notify; + p->iface_sub.neigh_notify = bfd_neigh_notify; return p; } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 5b0569ae..3531ce51 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1686,7 +1686,7 @@ bgp_init(struct proto_config *CF) P->rt_notify = bgp_rt_notify; P->preexport = bgp_preexport; - P->neigh_notify = bgp_neigh_notify; + P->iface_sub.neigh_notify = bgp_neigh_notify; P->reload_routes = bgp_reload_routes; P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index ad4b2d14..77a14b7a 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -370,8 +370,8 @@ ospf_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); P->rt_notify = ospf_rt_notify; - P->if_notify = ospf_if_notify; - P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; + P->iface_sub.if_notify = ospf_if_notify; + P->iface_sub.ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; P->preexport = ospf_preexport; P->reload_routes = ospf_reload_routes; P->feed_begin = ospf_feed_begin; diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 75e405f0..8941d582 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -266,7 +266,7 @@ perf_init(struct proto_config *CF) switch (p->mode) { case PERF_MODE_IMPORT: - P->ifa_notify = perf_ifa_notify; + P->iface_sub.ifa_notify = perf_ifa_notify; break; case PERF_MODE_EXPORT: P->rt_notify = perf_rt_notify; diff --git a/proto/radv/radv.c b/proto/radv/radv.c index ee1da36c..8a7440e6 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -579,8 +579,8 @@ radv_init(struct proto_config *CF) P->preexport = radv_preexport; P->rt_notify = radv_rt_notify; - P->if_notify = radv_if_notify; - P->ifa_notify = radv_ifa_notify; + P->iface_sub.if_notify = radv_if_notify; + P->iface_sub.ifa_notify = radv_ifa_notify; return P; } diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 93b0d528..abbd83f2 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -1135,9 +1135,9 @@ rip_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->if_notify = rip_if_notify; + P->iface_sub.if_notify = rip_if_notify; P->rt_notify = rip_rt_notify; - P->neigh_notify = rip_neigh_notify; + P->iface_sub.neigh_notify = rip_neigh_notify; P->reload_routes = rip_reload_routes; P->rte_better = rip_rte_better; P->rte_igp_metric = rip_rte_igp_metric; diff --git a/proto/static/static.c b/proto/static/static.c index bb93305e..ba0984b5 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -463,7 +463,7 @@ static_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->neigh_notify = static_neigh_notify; + P->iface_sub.neigh_notify = static_neigh_notify; P->reload_routes = static_reload_routes; P->rte_better = static_rte_better; P->rte_mergable = static_rte_mergable; diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 7ec30eb0..52822c3d 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -1046,7 +1046,7 @@ krt_init(struct proto_config *CF) p->p.preexport = krt_preexport; p->p.rt_notify = krt_rt_notify; - p->p.if_notify = krt_if_notify; + p->p.iface_sub.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; -- cgit v1.2.3 From 0bb04d5390f21b0c96fc4894ba5d5510c541f0ef Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sun, 5 Feb 2023 19:54:06 +0100 Subject: BGP: opening and closing listening socket asynchronously --- proto/bgp/bgp.c | 231 ++++++++++++++++++++++++++++++++------------------------ proto/bgp/bgp.h | 14 +++- 2 files changed, 146 insertions(+), 99 deletions(-) (limited to 'proto/bgp/bgp.c') diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 3531ce51..9c5d483a 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -127,7 +127,11 @@ #include "bgp.h" +static void bgp_listen_create(void *); + static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_listen_pending); /* Global list of listening socket open requests */ +static event bgp_listen_event = { .hook = bgp_listen_create }; static void bgp_connect(struct bgp_proto *p); @@ -139,73 +143,33 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_initiate_disable(struct bgp_proto *p, int err_val); -/** - * bgp_open - open a BGP instance - * @p: BGP instance - * - * This function allocates and configures shared BGP resources, mainly listening - * sockets. Should be called as the last step during initialization (when lock - * is acquired and neighbor is ready). When error, caller should change state to - * PS_DOWN and return immediately. - */ -static int -bgp_open(struct bgp_proto *p) +static inline int +bgp_setup_auth(struct bgp_proto *p, int enable) { - struct bgp_socket *bs = NULL; - struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; - ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : - (p->ipv4 ? IPA_NONE4 : IPA_NONE6); - uint port = p->cf->local_port; - uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; - uint flag_mask = SKF_FREEBIND; - - /* We assume that cf->iface is defined iff cf->local_ip is link-local */ + if (p->cf->password && p->listen.sock) + { + ip_addr prefix = p->cf->remote_ip; + int pxlen = -1; - WALK_LIST(bs, bgp_sockets) - if (ipa_equal(bs->sk->saddr, addr) && - (bs->sk->sport == port) && - (bs->sk->iface == ifa) && - (bs->sk->vrf == p->p.vrf) && - ((bs->sk->flags & flag_mask) == flags)) + if (p->cf->remote_range) { - bs->uc++; - p->sock = bs; - return 0; + prefix = net_prefix(p->cf->remote_range); + pxlen = net_pxlen(p->cf->remote_range); } - sock *sk = sk_new(proto_pool); - sk->type = SK_TCP_PASSIVE; - sk->ttl = 255; - sk->saddr = addr; - sk->sport = port; - sk->iface = ifa; - sk->vrf = p->p.vrf; - sk->flags = flags; - sk->tos = IP_PREC_INTERNET_CONTROL; - sk->rbsize = BGP_RX_BUFFER_SIZE; - sk->tbsize = BGP_TX_BUFFER_SIZE; - sk->rx_hook = bgp_incoming_connection; - sk->err_hook = bgp_listen_sock_err; - - if (sk_open(sk) < 0) - goto err; - - bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); - bs->sk = sk; - bs->uc = 1; - p->sock = bs; - sk->data = bs; - - add_tail(&bgp_sockets, &bs->n); + int rv = sk_set_md5_auth(p->listen.sock->sk, + p->cf->local_ip, prefix, pxlen, p->cf->iface, + enable ? p->cf->password : NULL, p->cf->setkey); - return 0; + if (rv < 0) + sk_log_error(p->listen.sock->sk, p->p.name); -err: - sk_log_error(sk, p->p.name); - log(L_ERR "%s: Cannot open listening socket", p->p.name); - rfree(sk); - return -1; + return rv; + } + else + return 0; } /** @@ -217,43 +181,119 @@ err: static void bgp_close(struct bgp_proto *p) { - struct bgp_socket *bs = p->sock; + struct bgp_listen_request *req = &p->listen; + struct bgp_socket *bs = req->sock; - ASSERT(bs && bs->uc); + ASSERT(bs); - if (--bs->uc) - return; + req->sock = NULL; + rem_node(&req->n); - rfree(bs->sk); - rem_node(&bs->n); - mb_free(bs); + if (EMPTY_LIST(bs->requests)) + ev_schedule(&bgp_listen_event); } -static inline int -bgp_setup_auth(struct bgp_proto *p, int enable) +/** + * bgp_open - open a BGP instance + * @p: BGP instance + * + * This function allocates and configures shared BGP resources, mainly listening + * sockets. Should be called as the last step during initialization (when lock + * is acquired and neighbor is ready). When error, caller should change state to + * PS_DOWN and return immediately. + */ +static void +bgp_open(struct bgp_proto *p) +{ + struct bgp_listen_request *req = &p->listen; + /* We assume that cf->iface is defined iff cf->local_ip is link-local */ + req->iface = p->cf->strict_bind ? p->cf->iface : NULL; + req->vrf = p->p.vrf; + req->addr = p->cf->strict_bind ? p->cf->local_ip : + (p->ipv4 ? IPA_NONE4 : IPA_NONE6); + req->port = p->cf->local_port; + req->flags = p->cf->free_bind ? SKF_FREEBIND : 0; + + add_tail(&bgp_listen_pending, &req->n); + ev_schedule(&bgp_listen_event); +} + +static void +bgp_listen_create(void *_ UNUSED) { - if (p->cf->password) + uint flag_mask = SKF_FREEBIND; + + struct bgp_listen_request *req; + WALK_LIST_FIRST(req, bgp_listen_pending) { - ip_addr prefix = p->cf->remote_ip; - int pxlen = -1; + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); + rem_node(&req->n); + + /* First try to find existing socket */ + struct bgp_socket *bs; + WALK_LIST(bs, bgp_sockets) + if (ipa_equal(bs->sk->saddr, req->addr) && + (bs->sk->sport == req->port) && + (bs->sk->iface == req->iface) && + (bs->sk->vrf == req->vrf) && + ((bs->sk->flags & flag_mask) == req->flags)) + break; - if (p->cf->remote_range) + /* Not found any */ + if (!NODE_VALID(bs)) { - prefix = net_prefix(p->cf->remote_range); - pxlen = net_pxlen(p->cf->remote_range); - } + sock *sk = sk_new(proto_pool); + sk->type = SK_TCP_PASSIVE; + sk->ttl = 255; + sk->saddr = req->addr; + sk->sport = req->port; + sk->iface = req->iface; + sk->vrf = req->vrf; + sk->flags = req->flags; + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->rbsize = BGP_RX_BUFFER_SIZE; + sk->tbsize = BGP_TX_BUFFER_SIZE; + sk->rx_hook = bgp_incoming_connection; + sk->err_hook = bgp_listen_sock_err; + + if (sk_open(sk) < 0) + { + sk_log_error(sk, p->p.name); + log(L_ERR "%s: Cannot open listening socket", p->p.name); + rfree(sk); + bgp_initiate_disable(p, BEM_NO_SOCKET); - int rv = sk_set_md5_auth(p->sock->sk, - p->cf->local_ip, prefix, pxlen, p->cf->iface, - enable ? p->cf->password : NULL, p->cf->setkey); + continue; + } - if (rv < 0) - sk_log_error(p->sock->sk, p->p.name); + bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); + bs->sk = sk; + sk->data = bs; - return rv; + init_list(&bs->requests); + add_tail(&bgp_sockets, &bs->n); + } + + add_tail(&bs->requests, &req->n); + req->sock = bs; + + if (bgp_setup_auth(p, 1) < 0) + { + bgp_close(p); + bgp_initiate_disable(p, BEM_INVALID_MD5); + } } - else - return 0; + + /* Cleanup leftover listening sockets */ + struct bgp_socket *bs; + node *nxt; + WALK_LIST_DELSAFE(bs, nxt, bgp_sockets) + if (EMPTY_LIST(bs->requests)) + { + rfree(bs->sk); + rem_node(&bs->n); + mb_free(bs); + } } static inline struct bgp_channel * @@ -296,13 +336,7 @@ bgp_startup_timeout(timer *t) static void bgp_initiate(struct bgp_proto *p) { - int err_val; - - if (bgp_open(p) < 0) - { err_val = BEM_NO_SOCKET; goto err1; } - - if (bgp_setup_auth(p, 1) < 0) - { err_val = BEM_INVALID_MD5; goto err2; } + bgp_open(p); if (p->cf->bfd) bgp_update_bfd(p, p->cf->bfd); @@ -315,12 +349,11 @@ bgp_initiate(struct bgp_proto *p) } else bgp_startup(p); +} - return; - -err2: - bgp_close(p); -err1: +static void +bgp_initiate_disable(struct bgp_proto *p, int err_val) +{ p->p.disabled = 1; bgp_store_error(p, NULL, BE_MISC, err_val); @@ -1146,12 +1179,15 @@ static struct bgp_proto * bgp_find_proto(sock *sk) { struct bgp_proto *best = NULL; - struct bgp_proto *p; + struct bgp_socket *bs = sk->data; + struct bgp_listen_request *req; /* sk->iface is valid only if src or dst address is link-local */ int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr); - WALK_LIST(p, proto_list) + WALK_LIST(req, bs->requests) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); if ((p->p.proto == &proto_bgp) && (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && @@ -1165,6 +1201,7 @@ bgp_find_proto(sock *sk) if (!bgp_is_dynamic(p)) break; } + } return best; } diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 2808d479..5f1a35ce 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -266,8 +266,8 @@ struct bgp_caps { struct bgp_socket { node n; /* Node in global bgp_sockets */ + list requests; /* Listen requests */ sock *sk; /* Real listening socket */ - u32 uc; /* Use count */ }; struct bgp_stats { @@ -302,6 +302,16 @@ struct bgp_conn { uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; +struct bgp_listen_request { + node n; /* Node in bgp_socket / pending list */ + struct bgp_socket *sock; /* Assigned socket */ + ip_addr addr; + struct iface *iface; + struct iface *vrf; + uint port; + uint flags; +}; + struct bgp_proto { struct proto p; const struct bgp_config *cf; /* Shortcut to BGP configuration */ @@ -333,7 +343,7 @@ struct bgp_proto { struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ - struct bgp_socket *sock; /* Shared listening socket */ + struct bgp_listen_request listen; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ struct bgp_stats stats; /* BGP statistics */ -- cgit v1.2.3