diff options
author | Maria Matejka <mq@ucw.cz> | 2021-09-27 16:40:28 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2021-11-22 19:05:44 +0100 |
commit | f0507f05ce57398e135651896dace4cb68eeed54 (patch) | |
tree | 44bfd6148689af15f4f5469b2f37bca55c3e7327 /nest | |
parent | 3b20722a1fc777c27ab2e0451d0ea3fee7fa81a2 (diff) |
Route sources have an explicit owner
This commit prevents use-after-free of routes belonging to protocols
which have been already destroyed, delaying also all the protocols'
shutdown until all of their routes have been finally propagated through
all the pipes down to the appropriate exports.
The use-after-free was somehow hypothetic yet theoretically possible in
rare conditions, when one BGP protocol authors a lot of routes and the
user deletes that protocol by reconfiguring in the same time as next hop
update is requested, causing rte_better() to be called on a
not-yet-pruned network prefix while the owner protocol has been already
freed.
In parallel execution environments, this would happen an inter-thread
use-after-free, causing possible heisenbugs or other nasty problems.
Diffstat (limited to 'nest')
-rw-r--r-- | nest/proto.c | 12 | ||||
-rw-r--r-- | nest/protocol.h | 4 | ||||
-rw-r--r-- | nest/route.h | 62 | ||||
-rw-r--r-- | nest/rt-attr.c | 123 | ||||
-rw-r--r-- | nest/rt-dev.c | 2 | ||||
-rw-r--r-- | nest/rt-show.c | 6 | ||||
-rw-r--r-- | nest/rt-table.c | 37 |
7 files changed, 186 insertions, 60 deletions
diff --git a/nest/proto.c b/nest/proto.c index f8e1ba31..930fad1d 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -2329,10 +2329,17 @@ channel_reset_limit(struct channel *c, struct limit *l, int dir) c->limit_active &= ~(1 << dir); } +static struct rte_owner_class default_rte_owner_class; + static inline void proto_do_start(struct proto *p) { p->active = 1; + + rt_init_sources(&p->sources, p->name, proto_event_list(p)); + if (!p->sources.class) + p->sources.class = &default_rte_owner_class; + if (!p->cf->late_if_feed) if_feed_baby(p); } @@ -2341,10 +2348,8 @@ static void proto_do_up(struct proto *p) { if (!p->main_source) - { p->main_source = rt_get_source(p, 0); - rt_lock_source(p->main_source); - } + // Locked automaticaly proto_start_channels(p); @@ -2371,6 +2376,7 @@ proto_do_stop(struct proto *p) } proto_stop_channels(p); + rt_destroy_sources(&p->sources, p->event); p->do_stop = 1; proto_send_event(p); diff --git a/nest/protocol.h b/nest/protocol.h index 981ca96a..440297a1 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -78,7 +78,6 @@ struct protocol { int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ - void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ @@ -146,6 +145,7 @@ struct proto { list channels; /* List of channels to rtables (struct channel) */ struct channel *main_channel; /* Primary channel */ struct rte_src *main_source; /* Primary route source */ + struct rte_owner sources; /* Route source owner structure */ struct iface *vrf; /* Related VRF instance, NULL if global */ const char *name; /* Name of this instance (== cf->name) */ @@ -360,7 +360,7 @@ void proto_notify_state(struct proto *p, unsigned state); */ static inline int proto_is_inactive(struct proto *p) -{ return (p->active_channels == 0) && (p->active_coroutines == 0); } +{ return (p->active_channels == 0) && (p->active_coroutines == 0) && (p->sources.uc == 0); } /* diff --git a/nest/route.h b/nest/route.h index 310cea92..a01eff1a 100644 --- a/nest/route.h +++ b/nest/route.h @@ -15,6 +15,8 @@ #include "lib/bitmap.h" #include "lib/resource.h" #include "lib/net.h" +#include "lib/hash.h" +#include "lib/event.h" #include <stdatomic.h> @@ -579,10 +581,10 @@ struct nexthop { struct rte_src { struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ + struct rte_owner *owner; /* Route source owner */ u32 private_id; /* Private ID, assigned by the protocol */ u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ + _Atomic u64 uc; /* Use count */ }; @@ -720,11 +722,57 @@ typedef struct ea_list { #define EALF_BISECT 2 /* Use interval bisection for searching */ #define EALF_CACHED 4 /* Attributes belonging to cached rta */ -struct rte_src *rt_find_source(struct proto *p, u32 id); -struct rte_src *rt_get_source(struct proto *p, u32 id); -static inline void rt_lock_source(struct rte_src *src) { src->uc++; } -static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } -void rt_prune_sources(void); +struct rte_owner_class { + void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ + int (*rte_better)(struct rte *, struct rte *); + int (*rte_mergable)(struct rte *, struct rte *); + u32 (*rte_igp_metric)(struct rte *); +}; + +struct rte_owner { + struct rte_owner_class *class; + int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + HASH(struct rte_src) hash; + const char *name; + u32 hash_key; + u32 uc; + event_list *list; + event *prune; + event *stop; +}; + +DEFINE_DOMAIN(attrs); +extern DOMAIN(attrs) attrs_domain; + +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +#define RTE_SRC_PU_SHIFT 44 +#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) + +struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); +#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) +static inline void rt_lock_source(struct rte_src *src) +{ + u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); +} + +static inline void rt_unlock_source(struct rte_src *src) +{ + u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel); + u64 pending = uc >> RTE_SRC_PU_SHIFT; + uc &= RTE_SRC_IN_PROGRESS - 1; + + ASSERT_DIE(uc > pending); + if (uc == pending + 1) + ev_send(src->owner->list, src->owner->prune); + + atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel); +} + +void rt_init_sources(struct rte_owner *, const char *name, event_list *list); +void rt_destroy_sources(struct rte_owner *, event *); struct ea_walk_state { ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 77fd3c3b..f7e33d72 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -85,6 +85,8 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +DOMAIN(attrs) attrs_domain; + pool *rta_pool; static slab *rta_slab_[4]; @@ -96,16 +98,14 @@ static struct idm src_ids; /* rte source hash */ -#define RSH_KEY(n) n->proto, n->private_id +#define RSH_KEY(n) n->private_id #define RSH_NEXT(n) n->next -#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2 -#define RSH_FN(p,n) p->hash_key ^ u32_hash(n) +#define RSH_EQ(n1,n2) n1 == n2 +#define RSH_FN(n) u32_hash(n) #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 -#define RSH_INIT_ORDER 6 - -static HASH(struct rte_src) src_hash; +#define RSH_INIT_ORDER 2 static void rte_src_init(void) @@ -113,55 +113,134 @@ rte_src_init(void) rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); - - HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) -struct rte_src * -rt_find_source(struct proto *p, u32 id) +static struct rte_src * +rt_find_source(struct rte_owner *p, u32 id) { - return HASH_FIND(src_hash, RSH, p, id); + return HASH_FIND(p->hash, RSH, id); } struct rte_src * -rt_get_source(struct proto *p, u32 id) +rt_get_source_o(struct rte_owner *p, u32 id) { + if (p->stop) + bug("Stopping route owner asked for another source."); + struct rte_src *src = rt_find_source(p, id); if (src) + { + UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); return src; + } + RTA_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + atomic_store_explicit(&src->uc, 1, memory_order_release); + p->uc++; + + HASH_INSERT2(p->hash, RSH, rta_pool, src); + if (config->table_debug) + log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + + RTA_UNLOCK; return src; } +static inline void +rt_done_sources(struct rte_owner *o) +{ + if (o->stop->list) + ev_send(o->stop->list, o->stop); + else + ev_send(o->list, o->stop); +} + void -rt_prune_sources(void) +rt_prune_sources(void *data) { - HASH_WALK_FILTER(src_hash, next, src, sp) + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + u64 uc; + while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) + ; + + if (uc == 0) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + + HASH_DO_REMOVE(o->hash, RSH, sp); + + RTA_LOCK; idm_free(&src_ids, src->global_id); sl_free(rte_src_slab, src); + RTA_UNLOCK; } } HASH_WALK_FILTER_END; - HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool); + RTA_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); + + if (o->stop && !o->uc) + { + rfree(o->prune); + RTA_UNLOCK; + + if (config->table_debug) + log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); + + rt_done_sources(o); + } + else + RTA_UNLOCK; } +void +rt_init_sources(struct rte_owner *o, const char *name, event_list *list) +{ + RTA_LOCK; + HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + o->hash_key = random_u32(); + o->uc = 0; + o->name = name; + o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->stop = NULL; + o->list = list; + RTA_UNLOCK; +} + +void +rt_destroy_sources(struct rte_owner *o, event *done) +{ + o->stop = done; + + if (!o->uc) + { + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); + + RTA_LOCK; + rfree(o->prune); + RTA_UNLOCK; + + rt_done_sources(o); + } + else + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc); +} /* * Multipath Next Hop @@ -1328,6 +1407,8 @@ rta_show(struct cli *c, rta *a) void rta_init(void) { + attrs_domain = DOMAIN_NEW(attrs, "Attributes"); + rta_pool = rp_new(&root_pool, "Attributes"); rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 5d1e57b3..c1251675 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -68,6 +68,7 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); rte_update(c, net, NULL, src); + rt_unlock_source(src); } else if (flags & IF_CHANGE_UP) { @@ -93,6 +94,7 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) }; rte_update(c, net, &e0, src); + rt_unlock_source(src); } } diff --git a/nest/rt-show.c b/nest/rt-show.c index d942b8e1..8196903d 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -56,7 +56,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary if (d->verbose && !rta_is_cached(a) && a->eattrs) ea_normalize(a->eattrs); - get_route_info = e->src->proto->proto->get_route_info; + get_route_info = e->src->owner->class ? e->src->owner->class->get_route_info : NULL; if (get_route_info) get_route_info(e, info); else @@ -66,7 +66,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rt_show_table(c, d); cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), - e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (a->dest == RTD_UNICAST) for (nh = &(a->nh); nh; nh = nh->next) @@ -211,7 +211,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) } } - if (d->show_protocol && (d->show_protocol != e.src->proto)) + if (d->show_protocol && (&d->show_protocol->sources != e.src->owner)) goto skip; if (f_run(d->filter, &e, c->show_pool, 0) > F_ACCEPT) diff --git a/nest/rt-table.c b/nest/rt-table.c index b4cd0448..c67f5bf8 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -367,16 +367,16 @@ rte_better(rte *new, rte *old) return 1; if (new->attrs->pref < old->attrs->pref) return 0; - if (new->src->proto->proto != old->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols * have the same preference, try to break the tie by comparing addresses. Not too * useful, but keeps the ordering of routes unambiguous. */ - return new->src->proto->proto > old->src->proto->proto; + return new->src->owner->class > old->src->owner->class; } - if (better = new->src->proto->rte_better) + if (better = new->src->owner->class->rte_better) return better(new, old); return 0; } @@ -392,10 +392,10 @@ rte_mergable(rte *pri, rte *sec) if (pri->attrs->pref != sec->attrs->pref) return 0; - if (pri->src->proto->proto != sec->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; @@ -1269,10 +1269,10 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr { if (!old->generation && !new->generation) bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); } if (new && rte_same(old, new)) @@ -1341,8 +1341,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && - src->proto->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) + if (src->owner->rte_recalculate && + src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; if (new_stored && rte_better(&new_stored->rte, old_best)) @@ -2237,8 +2237,6 @@ again: /* state change 2->0, 3->1 */ tab->prune_state &= 1; - rt_prune_sources(); - uint flushed_channels = 0; /* Close flushed channels */ @@ -2409,7 +2407,6 @@ rt_export_cleanup(void *data) done:; struct rt_import_hook *ih; node *x; - _Bool imports_stopped = 0; WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_WAITING) if (!first_export || (first_export->seq >= ih->flush_seq)) @@ -2419,16 +2416,8 @@ done:; rem_node(&ih->n); mb_free(ih); rt_unlock_table(tab); - imports_stopped = 1; } - if (imports_stopped) - { - if (config->table_debug) - log(L_TRACE "%s: Sources pruning routine requested", tab->name); - - rt_prune_sources(); - } if (EMPTY_LIST(tab->pending_exports) && tm_active(tab->export_timer)) tm_stop(tab->export_timer); @@ -2610,8 +2599,8 @@ rt_next_hop_update_net(rtable *tab, net *n) /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ - if (e->rte.src->proto->rte_recalculate) - e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + if (e->rte.src->owner->rte_recalculate) + e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); updates[pos++] = (struct rte_multiupdate) { .old = e, @@ -3083,8 +3072,8 @@ rt_get_igp_metric(rte *rt) if (rt->attrs->source == RTS_DEVICE) return 0; - if (rt->src->proto->rte_igp_metric) - return rt->src->proto->rte_igp_metric(rt); + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); return IGP_METRIC_UNKNOWN; } |