diff options
author | Maria Matejka <mq@ucw.cz> | 2021-09-29 16:15:13 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2021-11-22 19:05:44 +0100 |
commit | 878eeec12bf020c9e7460040d225a929bbbd2bd2 (patch) | |
tree | e60ffcdbcf26972912271aba2353c572f02c679f | |
parent | c7d0c5b2523a8cbfcaee9a235955dd5e58fab671 (diff) |
Routing tables now have their own loops.
This basically means that:
* there are some more levels of indirection and asynchronicity, mostly
in cleanup procedures, requiring correct lock ordering
* all the internal table operations (prune, next hop update) are done
without blocking the other parts of BIRD
* the protocols may get their own loops very soon
-rw-r--r-- | filter/f-inst.c | 4 | ||||
-rw-r--r-- | nest/proto.c | 75 | ||||
-rw-r--r-- | nest/protocol.h | 9 | ||||
-rw-r--r-- | nest/route.h | 108 | ||||
-rw-r--r-- | nest/rt-attr.c | 11 | ||||
-rw-r--r-- | nest/rt-show.c | 29 | ||||
-rw-r--r-- | nest/rt-table.c | 395 | ||||
-rw-r--r-- | proto/bgp/attrs.c | 2 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 12 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 2 | ||||
-rw-r--r-- | proto/mrt/mrt.c | 59 | ||||
-rw-r--r-- | proto/mrt/mrt.h | 6 | ||||
-rw-r--r-- | proto/perf/perf.c | 4 | ||||
-rw-r--r-- | proto/radv/radv.c | 5 | ||||
-rw-r--r-- | proto/static/static.c | 12 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 9 |
16 files changed, 502 insertions, 240 deletions
diff --git a/filter/f-inst.c b/filter/f-inst.c index 706eb684..0341a2f1 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -1212,7 +1212,7 @@ INST(FI_ROA_CHECK_IMPLICIT, 0, 1) { /* ROA Check */ NEVER_CONSTANT; RTC(1); - struct rtable *table = rtc->table; + rtable *table = rtc->table; ACCESS_RTE; ACCESS_EATTRS; const net_addr *net = fs->rte->net; @@ -1244,7 +1244,7 @@ ARG(1, T_NET); ARG(2, T_INT); RTC(3); - struct rtable *table = rtc->table; + rtable *table = rtc->table; u32 as = v2.val.i; diff --git a/nest/proto.c b/nest/proto.c index 35af3c6c..4ae0cbfd 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -172,7 +172,7 @@ proto_cf_find_channel(struct proto_config *pc, uint net_type) * Returns pointer to channel or NULL */ struct channel * -proto_find_channel_by_table(struct proto *p, struct rtable *t) +proto_find_channel_by_table(struct proto *p, rtable *t) { struct channel *c; @@ -236,7 +236,9 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->channel = cf->channel; c->proto = p; c->table = cf->table->table; - rt_lock_table(c->table); + + RT_LOCKED(c->table, t) + rt_lock_table(t); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; @@ -277,7 +279,9 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c) CD(c, "Removed", c->name); - rt_unlock_table(c->table); + RT_LOCKED(c->table, t) + rt_unlock_table(t); + rem_node(&c->n); mb_free(c); } @@ -391,7 +395,7 @@ static void channel_roa_subscribe_filter(struct channel *c, int dir) { const struct filter *f = dir ? c->in_filter : c->out_filter; - struct rtable *tab; + rtable *tab; int valid = 1, found = 0; if ((f == FILTER_ACCEPT) || (f == FILTER_REJECT)) @@ -560,11 +564,11 @@ channel_check_stopped(struct channel *c) } void -channel_import_stopped(struct rt_import_request *req) +channel_import_stopped(void *_c) { - struct channel *c = SKIP_BACK(struct channel, in_req, req); + struct channel *c = _c; - req->hook = NULL; + c->in_req.hook = NULL; mb_free(c->in_req.name); c->in_req.name = NULL; @@ -661,17 +665,16 @@ channel_aux_stopped(void *data) else c->in_table = NULL; - rfree(cat->tab->rp); - + rfree(cat->tab->priv.rp); mb_free(cat); - return channel_check_stopped(c); + channel_check_stopped(c); } static void -channel_aux_import_stopped(struct rt_import_request *req) +channel_aux_import_stopped(void *_cat) { - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); - ASSERT_DIE(cat->tab->delete_event); + struct channel_aux_table *cat = _cat; + cat->push.hook = NULL; } static void @@ -680,24 +683,35 @@ channel_aux_export_stopped(struct rt_export_request *req) struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); req->hook = NULL; - if (cat->refeed_pending && !cat->tab->delete_event) - { - cat->refeed_pending = 0; - rt_request_export(cat->tab, req); - } - else - ASSERT_DIE(cat->tab->delete_event); + int del; + RT_LOCKED(cat->tab, t) + del = !!t->delete_event; + + if (del) + return; + + ASSERT_DIE(cat->refeed_pending); + cat->refeed_pending = 0; + rt_request_export(cat->tab, req); } static void channel_aux_stop(struct channel_aux_table *cat) { - rt_stop_import(&cat->push, channel_aux_import_stopped); - rt_stop_export(&cat->get, channel_aux_export_stopped); + RT_LOCKED(cat->tab, t) + { + t->delete_event = ev_new_init(t->rp, channel_aux_stopped, cat); + t->delete_event->list = proto_event_list(cat->c->proto); + } - cat->tab->delete_event = ev_new_init(cat->tab->rp, channel_aux_stopped, cat); + cat->push_stopped = (event) { + .hook = channel_aux_import_stopped, + .data = cat, + .list = proto_event_list(cat->c->proto), + }; - rt_unlock_table(cat->tab); + rt_stop_import(&cat->push, &cat->push_stopped); + rt_stop_export(&cat->get, channel_aux_export_stopped); } static void @@ -889,7 +903,6 @@ channel_setup_in_table(struct channel *c, int best) c->in_table->c = c; c->in_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); - rt_lock_table(c->in_table->tab); rt_request_import(c->in_table->tab, &c->in_table->push); rt_request_export(c->in_table->tab, &c->in_table->get); @@ -931,7 +944,6 @@ channel_setup_out_table(struct channel *c) c->out_table->c = c; c->out_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); - rt_lock_table(c->out_table->tab); rt_request_import(c->out_table->tab, &c->out_table->push); rt_request_export(c->out_table->tab, &c->out_table->get); @@ -993,7 +1005,14 @@ channel_do_stop(struct channel *c) /* Stop import */ if (c->in_req.hook) - rt_stop_import(&c->in_req, channel_import_stopped); + { + c->in_stopped = (event) { + .hook = channel_import_stopped, + .data = c, + .list = proto_event_list(c->proto), + }; + rt_stop_import(&c->in_req, &c->in_stopped); + } c->gr_wait = 0; if (c->gr_lock) @@ -2339,7 +2358,7 @@ proto_do_start(struct proto *p) { p->active = 1; - rt_init_sources(&p->sources, p->name, proto_event_list(p)); + rt_init_sources(&p->sources, p->name, proto_work_list(p)); if (!p->sources.class) p->sources.class = &default_rte_owner_class; diff --git a/nest/protocol.h b/nest/protocol.h index 1647fbba..8d077e44 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -18,7 +18,6 @@ struct iface; struct ifa; -struct rtable; struct rte; struct neighbor; struct rta; @@ -207,7 +206,7 @@ struct proto { * rte_remove Called whenever a rte is removed from the routing table. */ - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + int (*rte_recalculate)(rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); void (*rte_insert)(struct network *, struct rte *); @@ -496,7 +495,7 @@ struct channel { const struct channel_class *channel; struct proto *proto; - struct rtable *table; + rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ struct bmap export_map; /* Keeps track which routes were really exported */ @@ -556,6 +555,7 @@ struct channel { btime last_state_change; /* Time of last state transition */ struct channel_aux_table *in_table; /* Internal table for received routes */ + struct event in_stopped; /* Import stop callback */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ @@ -570,6 +570,7 @@ struct channel_aux_table { struct channel *c; struct rt_import_request push; struct rt_export_request get; + event push_stopped; rtable *tab; event *stop; u8 refeed_pending; @@ -633,7 +634,7 @@ struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_ty static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) { return proto_cf_find_channel(pc, pc->net_type); } -struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); +struct channel *proto_find_channel_by_table(struct proto *p, rtable *t); struct channel *proto_find_channel_by_name(struct proto *p, const char *n); struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); diff --git a/nest/route.h b/nest/route.h index 683c966e..9417d97d 100644 --- a/nest/route.h +++ b/nest/route.h @@ -146,30 +146,21 @@ void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src) * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. */ -struct rtable_config { - node n; - char *name; - struct config *config; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ - btime export_settle_time; /* Delay before exports are announced */ - uint cork_limit; /* Amount of routes to be pending on export to cork imports */ -}; - -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ +typedef struct rtable_private { +#define RTABLE_PUBLIC \ + resource r; \ + node n; /* Node in list of all tables */ \ + struct birdloop *loop; /* This loop runs the table */ \ + char *name; /* Name of this table */ \ + uint addr_type; /* Type of address data stored in table (NET_*) */ \ + struct rtable_config *config; /* Configuration of this table */ \ + struct event *nhu_event; /* Event to update next hops */ \ + _Atomic byte nhu_state; /* Next Hop Update state */ \ + + RTABLE_PUBLIC; pool *rp; /* Resource pool to allocate everything from, including itself */ struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; - char *name; /* Name of this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ @@ -178,18 +169,15 @@ typedef struct rtable { struct hmap id_map; struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ struct event *prune_event; /* Event to prune abandoned routes */ struct event *ec_event; /* Event to prune finished exports */ struct event *hcu_event; /* Event to update host cache */ - struct event *nhu_event; /* Event to update next hops */ struct event *delete_event; /* Event to delete the table */ btime last_rt_change; /* Last time when route changed */ btime base_settle_time; /* Start time of rtable settling interval */ btime gc_time; /* Time of last GC */ int gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte nhu_state; /* Next Hop Update state */ byte cork_active; /* Congestion control activated */ @@ -208,8 +196,35 @@ typedef struct rtable { struct rt_pending_export *first_export; /* First export to announce */ u64 next_export_seq; /* The next export will have this ID */ +} rtable_private; + +typedef union { + struct { RTABLE_PUBLIC }; + rtable_private priv; } rtable; +#define RT_LOCK(tab) ({ birdloop_enter((tab)->loop); &(tab)->priv; }) +#define RT_UNLOCK(tab) birdloop_leave((tab)->loop) +#define RT_PRIV(tab) ({ ASSERT_DIE(birdloop_inside((tab)->loop)); &(tab)->priv; }) + +#define RT_LOCKED(tpub, tpriv) for (rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL)) + +struct rtable_config { + node n; + char *name; + struct config *config; + rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int gc_max_ops; /* Maximum number of operations before GC is run */ + int gc_min_time; /* Minimum time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ + btime export_settle_time; /* Delay before exports are announced */ + uint cork_limit; /* Amount of routes to be pending on export to cork imports */ +}; + struct rt_subscription { node n; rtable *tab; @@ -244,7 +259,7 @@ struct hostentry { ip_addr addr; /* IP address of host, part of key */ ip_addr link; /* (link-local) IP address of host, used as gw if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ + rtable *tab; /* Dependent table, part of key */ struct hostentry *next; /* Next in hash chain */ unsigned hash_key; /* Hash key */ unsigned uc; /* Use count */ @@ -324,7 +339,7 @@ struct rt_import_hook { u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ - void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ + struct event *stopped; /* Event to run when import is stopped */ }; struct rt_pending_export { @@ -405,7 +420,7 @@ extern struct event_cork rt_cork; void rt_request_import(rtable *tab, struct rt_import_request *req); void rt_request_export(rtable *tab, struct rt_export_request *req); -void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_import(struct rt_import_request *, struct event *stopped); void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); const char *rt_import_state_name(u8 state); @@ -480,27 +495,27 @@ struct config; void rt_init(void); void rt_preconfig(struct config *); void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); +void rt_lock_table(rtable_private *); +void rt_unlock_table(rtable_private *); void rt_subscribe(rtable *tab, struct rt_subscription *s); void rt_unsubscribe(struct rt_subscription *s); rtable *rt_setup(pool *, struct rtable_config *); -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) +static inline net *net_find(rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable_private *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); +static inline net *net_get(rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +void *net_route(rtable_private *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +int rt_examine(rtable_private *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); void rt_refresh_begin(struct rt_import_request *); void rt_refresh_end(struct rt_import_request *); -void rt_schedule_prune(rtable *t); +void rt_schedule_prune(rtable_private *t); void rte_dump(struct rte_storage *); -void rte_free(struct rte_storage *, rtable *); -struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rte_free(struct rte_storage *, rtable_private *); +struct rte_storage *rte_store(const rte *, net *net, rtable_private *); void rt_dump(rtable *); void rt_dump_all(void); void rt_dump_hooks(rtable *); @@ -591,7 +606,7 @@ struct rte_src { typedef struct rta { struct rta *next, **pprev; /* Hash chain */ - _Atomic u32 uc; /* Use count */ + u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ struct ea_list *eattrs; /* Extended Attribute chain */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ @@ -732,7 +747,7 @@ struct rte_owner_class { struct rte_owner { struct rte_owner_class *class; - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + int (*rte_recalculate)(rtable_private *, struct network *, struct rte *, struct rte *, struct rte *); HASH(struct rte_src) hash; const char *name; u32 hash_key; @@ -863,9 +878,20 @@ static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a #define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->cached; } -static inline rta *rta_clone(rta *r) { ASSERT_DIE(0 < atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel)); return r; } +static inline rta *rta_clone(rta *r) { + RTA_LOCK; + r->uc++; + RTA_UNLOCK; + return r; +} + void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel))) rta__free(r); } +static inline void rta_free(rta *r) { + RTA_LOCK; + if (r && !--r->uc) + rta__free(r); + RTA_UNLOCK; +} rta *rta_do_cow(rta *o, linpool *lp); static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; } diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 20f9835d..cd4c6892 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -1287,7 +1287,7 @@ rta_lookup(rta *o) for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) if (r->hash_key == h && rta_same(r, o)) { - atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + r->uc++; RTA_UNLOCK; return r; } @@ -1308,14 +1308,6 @@ rta_lookup(rta *o) void rta__free(rta *a) { - RTA_LOCK; - if (atomic_load_explicit(&a->uc, memory_order_acquire)) - { - /* Somebody has cloned this rta inbetween. This sometimes happens. */ - RTA_UNLOCK; - return; - } - ASSERT(rta_cache_count && a->cached); rta_cache_count--; *a->pprev = a->next; @@ -1327,7 +1319,6 @@ rta__free(rta *a) ea_free(a->eattrs); a->cached = 0; sl_free(rta_slab(a), a); - RTA_UNLOCK; } rta * diff --git a/nest/rt-show.c b/nest/rt-show.c index 8196903d..65b59af4 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -239,11 +239,13 @@ rt_show_cleanup(struct cli *c) /* Unlink the iterator */ if (d->table_open) - fit_get(&d->tab->table->fib, &d->fit); + RT_LOCKED(d->tab->table, t) + fit_get(&t->fib, &d->fit); /* Unlock referenced tables */ WALK_LIST(tab, d->tables) - rt_unlock_table(tab->table); + RT_LOCKED(tab->table, t) + rt_unlock_table(t); } static void @@ -255,8 +257,6 @@ rt_show_cont(struct cli *c) #else unsigned max = 64; #endif - struct fib *fib = &d->tab->table->fib; - struct fib_iterator *it = &d->fit; if (d->running_on_config && (d->running_on_config != config)) { @@ -264,9 +264,14 @@ rt_show_cont(struct cli *c) goto done; } + rtable_private *t = RT_LOCK(d->tab->table); + + struct fib *fib = &t->fib; + struct fib_iterator *it = &d->fit; + if (!d->table_open) { - FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + FIB_ITERATE_INIT(&d->fit, fib); d->table_open = 1; d->table_counter++; d->kernel = rt_show_get_kernel(d); @@ -284,6 +289,7 @@ rt_show_cont(struct cli *c) if (!max--) { FIB_ITERATE_PUT(it); + RT_UNLOCK(d->tab->table); return; } rt_show_net(c, n, d); @@ -300,6 +306,8 @@ rt_show_cont(struct cli *c) d->net_counter - d->net_counter_last, d->tab->table->name); } + RT_UNLOCK(d->tab->table); + d->kernel = NULL; d->table_open = 0; d->tab = NODE_NEXT(d->tab); @@ -431,7 +439,8 @@ rt_show(struct rt_show_data *d) if (!d->addr) { WALK_LIST(tab, d->tables) - rt_lock_table(tab->table); + RT_LOCKED(tab->table, t) + rt_lock_table(t); /* There is at least one table */ d->tab = HEAD(d->tables); @@ -446,13 +455,17 @@ rt_show(struct rt_show_data *d) d->tab = tab; d->kernel = rt_show_get_kernel(d); + RT_LOCK(tab->table); + if (d->show_for) - n = net_route(tab->table, d->addr); + n = net_route(RT_PRIV(tab->table), d->addr); else - n = net_find(tab->table, d->addr); + n = net_find(RT_PRIV(tab->table), d->addr); if (n) rt_show_net(this_cli, n, d); + + RT_UNLOCK(tab->table); } if (d->rt_counter) diff --git a/nest/rt-table.c b/nest/rt-table.c index fb0496bd..f304372f 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -61,15 +61,15 @@ struct rt_export_block { struct rt_pending_export export[]; }; -static void rt_free_hostcache(rtable *tab); -static void rt_notify_hostcache(rtable *tab, net *net); +static void rt_free_hostcache(rtable_private *tab); +static void rt_notify_hostcache(rtable_private *tab, net *net); static void rt_update_hostcache(void *tab); static void rt_next_hop_update(void *tab); static inline void rt_prune_table(void *tab); -static inline void rt_schedule_notify(rtable *tab); +static inline void rt_schedule_notify(rtable_private *tab); static void rt_feed_channel(void *); -static inline void rt_export_used(rtable *tab); +static inline void rt_export_used(rtable_private *tab); static void rt_export_cleanup(void *tab); const char *rt_import_state_name_array[TIS_MAX] = { @@ -122,7 +122,7 @@ rte_update_unlock(struct channel *c) /* Like fib_route(), but skips empty net entries */ static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +net_route_ip4(rtable_private *t, net_addr_ip4 *n) { net *r; @@ -136,7 +136,7 @@ net_route_ip4(rtable *t, net_addr_ip4 *n) } static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +net_route_ip6(rtable_private *t, net_addr_ip6 *n) { net *r; @@ -150,7 +150,7 @@ net_route_ip6(rtable *t, net_addr_ip6 *n) } static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +net_route_ip6_sadr(rtable_private *t, net_addr_ip6_sadr *n) { struct fib_node *fn; @@ -189,7 +189,7 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) } void * -net_route(rtable *tab, const net_addr *n) +net_route(rtable_private *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); @@ -218,12 +218,15 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4(rtable *t, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; int anything = 0; + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + while (1) { for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) @@ -235,7 +238,10 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + { + RT_UNLOCK(tab); return ROA_VALID; + } } } @@ -246,16 +252,20 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) ip4_clrbit(&n.prefix, n.pxlen); } + RT_UNLOCK(tab); return anything ? ROA_INVALID : ROA_UNKNOWN; } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6(rtable *t, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; int anything = 0; + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + while (1) { for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) @@ -267,7 +277,10 @@ net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + { + RT_UNLOCK(tab); return ROA_VALID; + } } } @@ -278,6 +291,7 @@ net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) ip6_clrbit(&n.prefix, n.pxlen); } + RT_UNLOCK(tab); return anything ? ROA_INVALID : ROA_UNKNOWN; } @@ -328,7 +342,7 @@ rte_find(net *net, struct rte_src *src) struct rte_storage * -rte_store(const rte *r, net *net, rtable *tab) +rte_store(const rte *r, net *net, rtable_private *tab) { struct rte_storage *e = sl_alloc(tab->rte_slab); @@ -354,7 +368,7 @@ rte_store(const rte *r, net *net, rtable *tab) */ void -rte_free(struct rte_storage *e, rtable *tab) +rte_free(struct rte_storage *e, rtable_private *tab) { rt_unlock_source(e->rte.src); rta_free(e->rte.attrs); @@ -912,6 +926,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) hook->req->export_one(hook->req, n, rpe); else if (hook->req->export_bulk) { + RT_LOCK(hook->table); net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); uint count = rte_feed_count(net); rte **feed = NULL; @@ -920,6 +935,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) feed = alloca(count * sizeof(rte *)); rte_feed_obtain(net, feed, count); } + RT_UNLOCK(hook->table); hook->req->export_bulk(hook->req, n, rpe, feed, count); } else @@ -931,7 +947,11 @@ seen: /* The last block may be available to free */ if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) - rt_export_used(hook->table); + { + RT_LOCK(hook->table); + rt_export_used(RT_PRIV(hook->table)); + RT_UNLOCK(hook->table); + } /* Releasing this export for cleanup routine */ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); @@ -970,7 +990,7 @@ seen: * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { if (!new_best || !rte_is_valid(&new_best->rte)) @@ -1085,10 +1105,10 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage { ev_cork(&rt_cork); tab->cork_active = 1; - tm_start(tab->export_timer, 0); + tm_start_in(tab->export_timer, 0, tab->loop); } else if (!tm_active(tab->export_timer)) - tm_start(tab->export_timer, tab->config->export_settle_time); + tm_start_in(tab->export_timer, tab->config->export_settle_time, tab->loop); } static struct rt_pending_export * @@ -1117,7 +1137,7 @@ rt_next_export_fast(struct rt_pending_export *last) } static struct rt_pending_export * -rt_next_export(struct rt_export_hook *hook, rtable *tab) +rt_next_export(struct rt_export_hook *hook, rtable_private *tab) { /* As the table is locked, it is safe to reload the last export pointer */ struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); @@ -1140,7 +1160,8 @@ rt_send_export_event(struct rt_export_hook *hook) static void rt_announce_exports(timer *tm) { - rtable *tab = tm->data; + rtable_private *tab = tm->data; + ASSERT_DIE(birdloop_inside(tab->loop)); struct rt_export_hook *c; node *n; WALK_LIST2(c, n, tab->exports, n) @@ -1153,7 +1174,7 @@ rt_announce_exports(timer *tm) } static struct rt_pending_export * -rt_last_export(rtable *tab) +rt_last_export(rtable_private *tab) { struct rt_pending_export *rpe = NULL; @@ -1179,13 +1200,17 @@ rt_export_hook(void *_data) if (!c->rpe_next) { - c->rpe_next = rt_next_export(c, c->table); + RT_LOCK(c->table); + c->rpe_next = rt_next_export(c, RT_PRIV(c->table)); if (!c->rpe_next) { - rt_export_used(c->table); + rt_export_used(RT_PRIV(c->table)); + RT_UNLOCK(c->table); return; } + + RT_UNLOCK(c->table); } /* Process the export */ @@ -1255,10 +1280,9 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(rtable_private *table, struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { struct rt_import_request *req = c->req; - struct rtable *table = c->table; struct rt_import_stats *stats = &c->stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; @@ -1521,7 +1545,6 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ASSERT(c->channel_state == CS_UP); - if (c->in_table) rte_import(&c->in_table->push, n, new, src); else @@ -1575,27 +1598,32 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt if (!hook) return; + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); + net *nn; if (new) { /* Use the actual struct network, not the dummy one */ - nn = net_get(hook->table, n); + nn = net_get(tab, n); new->net = nn->n.addr; new->sender = hook; } - else if (!(nn = net_find(hook->table, n))) + else if (!(nn = net_find(tab, n))) { req->hook->stats.withdraws_ignored++; + RT_UNLOCK(tab); return; } /* And recalculate the best route */ - rte_recalculate(hook, nn, new, src); + rte_recalculate(tab, hook, nn, new, src); + RT_UNLOCK(tab); } /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +rt_examine(rtable_private *t, net_addr *a, struct channel *c, const struct filter *filter) { net *n = net_find(t, a); @@ -1623,22 +1651,27 @@ static void rt_export_stopped(void *data) { struct rt_export_hook *hook = data; - rtable *tab = hook->table; - /* Drop pending exports */ - rt_export_used(tab); + RT_LOCKED(hook->table, tab) + { + /* Drop pending exports */ + rt_export_used(tab); - /* Unlist */ - rem_node(&hook->n); + /* Unlist */ + rem_node(&hook->n); + } /* Report the channel as stopped. */ hook->stopped(hook->req); - /* Free the hook together with its coroutine. */ - rfree(hook->pool); - rt_unlock_table(tab); + RT_LOCKED(hook->table, tab) + { + /* Free the hook together with its coroutine. */ + rfree(hook->pool); + rt_unlock_table(tab); - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + } } @@ -1663,8 +1696,10 @@ rt_set_export_state(struct rt_export_hook *hook, u8 state) } void -rt_request_import(rtable *tab, struct rt_import_request *req) +rt_request_import(rtable *t, struct rt_import_request *req) { + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); rt_lock_table(tab); struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); @@ -1672,7 +1707,7 @@ rt_request_import(rtable *tab, struct rt_import_request *req) DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); hook->req = req; - hook->table = tab; + hook->table = t; if (!hook->stale_set) hook->stale_set = hook->stale_valid = hook->stale_pruning = hook->stale_pruned = 1; @@ -1681,24 +1716,30 @@ rt_request_import(rtable *tab, struct rt_import_request *req) hook->n = (node) {}; add_tail(&tab->imports, &hook->n); + + RT_UNLOCK(t); } void -rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) +rt_stop_import(struct rt_import_request *req, event *stopped) { ASSERT_DIE(req->hook); struct rt_import_hook *hook = req->hook; - rt_schedule_prune(hook->table); + RT_LOCK(hook->table); + rt_schedule_prune(RT_PRIV(hook->table)); rt_set_import_state(hook, TIS_STOP); hook->stopped = stopped; + RT_UNLOCK(hook->table); } void -rt_request_export(rtable *tab, struct rt_export_request *req) +rt_request_export(rtable *t, struct rt_export_request *req) { + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); rt_lock_table(tab); pool *p = rp_new(tab->rp, "Export hook"); @@ -1706,7 +1747,7 @@ rt_request_export(rtable *tab, struct rt_export_request *req) hook->pool = p; hook->req = req; - hook->table = tab; + hook->table = t; /* stats zeroed by mb_allocz */ @@ -1714,7 +1755,7 @@ rt_request_export(rtable *tab, struct rt_export_request *req) rt_set_export_state(hook, TES_HUNGRY); - struct rt_pending_export *rpe = rt_last_export(hook->table); + struct rt_pending_export *rpe = rt_last_export(tab); DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); @@ -1726,9 +1767,11 @@ rt_request_export(rtable *tab, struct rt_export_request *req) DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); hook->event = ev_new_init(p, rt_feed_channel, hook); - rt_send_export_event(hook); + RT_UNLOCK(t); rt_set_export_state(hook, TES_FEEDING); + ASSERT_DIE(hook->export_state == TES_FEEDING); + rt_send_export_event(hook); } void @@ -1737,7 +1780,8 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; - rtable *tab = hook->table; + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); /* Stop feeding */ ev_postpone(hook->event); @@ -1750,10 +1794,11 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r rt_send_export_event(hook); + RT_UNLOCK(hook->table); + rt_set_export_state(hook, TES_STOP); } - /** * rt_refresh_begin - start a refresh cycle * @t: related routing table @@ -1772,14 +1817,17 @@ rt_refresh_begin(struct rt_import_request *req) struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); + ASSERT_DIE(hook->stale_set == hook->stale_valid); /* If the pruning routine is too slow */ if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) { - log(L_WARN "Route refresh flood in table %s", hook->table->name); - FIB_WALK(&hook->table->fib, net, n) + log(L_WARN "Route refresh flood in table %s", tab->name); + FIB_WALK(&tab->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) if (e->rte.sender == req->hook) @@ -1799,6 +1847,8 @@ rt_refresh_begin(struct rt_import_request *req) if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); + + RT_UNLOCK(tab); } /** @@ -1815,13 +1865,16 @@ rt_refresh_end(struct rt_import_request *req) struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); + RT_LOCK(hook->table); hook->stale_valid++; ASSERT_DIE(hook->stale_set == hook->stale_valid); - rt_schedule_prune(hook->table); + rt_schedule_prune(RT_PRIV(hook->table)); if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); + + RT_UNLOCK(hook->table); } /** @@ -1846,8 +1899,10 @@ rte_dump(struct rte_storage *e) * This function dumps contents of a given routing table to debug output. */ void -rt_dump(rtable *t) +rt_dump(rtable *tab) { + RT_LOCK(tab); + rtable_private *t = RT_PRIV(tab); debug("Dump of routing table <%s>%s\n", t->name, t->delete_event ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); @@ -1859,6 +1914,7 @@ rt_dump(rtable *t) } FIB_WALK_END; debug("\n"); + RT_UNLOCK(tab); } /** @@ -1877,11 +1933,13 @@ rt_dump_all(void) } void -rt_dump_hooks(rtable *tab) +rt_dump_hooks(rtable *t) { + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->delete_event ? " (deleted)" : ""); debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", - tab->nhu_state, ev_active(tab->hcu_event), tab->use_count, tab->rt_count); + atomic_load(&tab->nhu_state), ev_active(tab->hcu_event), tab->use_count, tab->rt_count); debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); @@ -1904,6 +1962,7 @@ rt_dump_hooks(rtable *tab) eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); } debug("\n"); + RT_UNLOCK(t); } void @@ -1921,37 +1980,36 @@ rt_dump_hooks_all(void) static inline void rt_schedule_nhu(rtable *tab) { - if (tab->nhu_state == NHU_CLEAN) - ev_schedule(tab->nhu_event); + atomic_fetch_or_explicit(&tab->nhu_state, NHU_SCHEDULED, memory_order_acq_rel); + ev_send_loop(tab->loop, tab->nhu_event); /* state change: * NHU_CLEAN -> NHU_SCHEDULED * NHU_RUNNING -> NHU_DIRTY */ - tab->nhu_state |= NHU_SCHEDULED; } void -rt_schedule_prune(rtable *tab) +rt_schedule_prune(rtable_private *tab) { if (tab->prune_state == 0) - ev_schedule(tab->prune_event); + ev_send_loop(tab->loop, tab->prune_event); /* state change 0->1, 2->3 */ tab->prune_state |= 1; } void -rt_export_used(rtable *tab) +rt_export_used(rtable_private *tab) { if (config->table_debug) log(L_TRACE "%s: Export cleanup requested", tab->name); - ev_schedule(tab->ec_event); + ev_send_loop(tab->loop, tab->ec_event); } static inline btime -rt_settled_time(rtable *tab) +rt_settled_time(rtable_private *tab) { ASSUME(tab->base_settle_time != 0); @@ -1962,7 +2020,8 @@ rt_settled_time(rtable *tab) static void rt_settle_timer(timer *t) { - rtable *tab = t->data; + rtable_private *tab = t->data; + ASSERT_DIE(birdloop_inside(tab->loop)); if (!tab->base_settle_time) return; @@ -1970,7 +2029,7 @@ rt_settle_timer(timer *t) btime settled_time = rt_settled_time(tab); if (current_time() < settled_time) { - tm_set(tab->settle_timer, settled_time); + tm_set_in(tab->settle_timer, settled_time, tab->loop); return; } @@ -1983,7 +2042,7 @@ rt_settle_timer(timer *t) } static void -rt_kick_settle_timer(rtable *tab) +rt_kick_settle_timer(rtable_private *tab) { tab->base_settle_time = current_time(); @@ -1991,11 +2050,11 @@ rt_kick_settle_timer(rtable *tab) tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0); if (!tm_active(tab->settle_timer)) - tm_set(tab->settle_timer, rt_settled_time(tab)); + tm_set_in(tab->settle_timer, rt_settled_time(tab), tab->loop); } static inline void -rt_schedule_notify(rtable *tab) +rt_schedule_notify(rtable_private *tab) { if (EMPTY_LIST(tab->subscribers)) return; @@ -2007,25 +2066,33 @@ rt_schedule_notify(rtable *tab) } void -rt_subscribe(rtable *tab, struct rt_subscription *s) +rt_subscribe(rtable *t, struct rt_subscription *s) { - s->tab = tab; - rt_lock_table(tab); - DBG("rt_subscribe(%s)\n", tab->name); - add_tail(&tab->subscribers, &s->n); + s->tab = t; + RT_LOCKED(t, tab) + { + rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); + add_tail(&tab->subscribers, &s->n); + } } void rt_unsubscribe(struct rt_subscription *s) { - rem_node(&s->n); - rt_unlock_table(s->tab); + RT_LOCKED(s->tab, tab) + { + rem_node(&s->n); + if (EMPTY_LIST(tab->subscribers) && tm_active(tab->settle_timer)) + tm_stop(tab->settle_timer); + rt_unlock_table(tab); + } } static void rt_free(resource *_r) { - rtable *r = (rtable *) _r; + rtable_private *r = (rtable_private *) _r; DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); @@ -2046,14 +2113,14 @@ rt_free(resource *_r) static void rt_res_dump(resource *_r) { - rtable *r = (rtable *) _r; + RT_LOCKED((rtable *) _r, r) debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", r->name, net_label[r->addr_type], r->rt_count, r->use_count); } static struct resclass rt_class = { .name = "Routing table", - .size = sizeof(struct rtable), + .size = sizeof(rtable_private), .free = rt_free, .dump = rt_res_dump, .lookup = NULL, @@ -2068,9 +2135,8 @@ rt_setup(pool *pp, struct rtable_config *cf) ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); pool *p = rp_new(pp, nb); - mb_move(nb, p); - rtable *t = ralloc(p, &rt_class); + rtable_private *t = ralloc(p, &rt_class); t->rp = p; t->rte_slab = sl_new(p, sizeof(struct rte_storage)); @@ -2090,6 +2156,8 @@ rt_setup(pool *pp, struct rtable_config *cf) init_list(&t->pending_exports); init_list(&t->subscribers); + t->loop = birdloop_new(p, DOMAIN_ORDER(rtable), nb); + t->ec_event = ev_new_init(p, rt_export_cleanup, t); t->prune_event = ev_new_init(p, rt_prune_table, t); t->hcu_event = ev_new_init(p, rt_update_hostcache, t); @@ -2106,7 +2174,8 @@ rt_setup(pool *pp, struct rtable_config *cf) t->nhu_lp = lp_new_default(p); - return t; + mb_move(nb, p); + return (rtable *) t; } /** @@ -2141,7 +2210,9 @@ rt_init(void) static void rt_prune_table(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct fib_iterator *fit = &tab->prune_fit; int limit = 512; @@ -2156,6 +2227,8 @@ rt_prune_table(void *data) if (tab->prune_state == 0) return; + rt_lock_table(tab); + if (tab->prune_state == 1) { /* Mark channels to flush */ @@ -2189,11 +2262,12 @@ again: if (limit <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->prune_event); + ev_send_loop(tab->loop, tab->prune_event); + rt_unlock_table(tab); return; } - rte_recalculate(e->rte.sender, n, NULL, e->rte.src); + rte_recalculate(tab, e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; @@ -2217,7 +2291,8 @@ again: tab->gc_time = current_time(); /* state change 2->0, 3->1 */ - tab->prune_state &= 1; + if (tab->prune_state &= 1) + ev_send_loop(tab->loop, tab->prune_event); uint flushed_channels = 0; @@ -2240,12 +2315,15 @@ again: /* In some cases, we may want to directly proceed to export cleanup */ if (EMPTY_LIST(tab->exports) && flushed_channels) rt_export_cleanup(tab); + + rt_unlock_table(tab); } static void rt_export_cleanup(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); u64 min_seq = ~((u64) 0); struct rt_pending_export *last_export_to_free = NULL; @@ -2394,7 +2472,7 @@ done:; if (!first_export || (first_export->seq >= ih->flush_seq)) { ih->import_state = TIS_CLEARED; - ih->stopped(ih->req); + ev_send(ih->stopped->list, ih->stopped); rem_node(&ih->n); mb_free(ih); rt_unlock_table(tab); @@ -2535,7 +2613,7 @@ no_nexthop: } static inline struct rte_storage * -rt_next_hop_update_rte(rtable *tab, net *n, rte *old) +rt_next_hop_update_rte(rtable_private *tab, net *n, rte *old) { rta *a = alloca(RTA_MAX_SIZE); memcpy(a, old->attrs, rta_size(old->attrs)); @@ -2553,7 +2631,7 @@ rt_next_hop_update_rte(rtable *tab, net *n, rte *old) } static inline int -rt_next_hop_update_net(rtable *tab, net *n) +rt_next_hop_update_net(rtable_private *tab, net *n) { struct rte_storage *new; int count = 0; @@ -2638,17 +2716,21 @@ rt_next_hop_update_net(rtable *tab, net *n) static void rt_next_hop_update(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == NHU_CLEAN) + if (atomic_load_explicit(&tab->nhu_state, memory_order_acquire) == NHU_CLEAN) return; - if (tab->nhu_state == NHU_SCHEDULED) + rt_lock_table(tab); + + if (atomic_load_explicit(&tab->nhu_state, memory_order_acquire) == NHU_SCHEDULED) { FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = NHU_RUNNING; + ASSERT_DIE(atomic_exchange_explicit(&tab->nhu_state, NHU_RUNNING, memory_order_acq_rel) == NHU_SCHEDULED); } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -2656,7 +2738,8 @@ rt_next_hop_update(void *data) if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->nhu_event); + ev_send_loop(tab->loop, tab->nhu_event); + rt_unlock_table(tab); return; } max_feed -= rt_next_hop_update_net(tab, n); @@ -2667,10 +2750,10 @@ rt_next_hop_update(void *data) * NHU_DIRTY -> NHU_SCHEDULED * NHU_RUNNING -> NHU_CLEAN */ - tab->nhu_state &= 1; + if (atomic_fetch_and_explicit(&tab->nhu_state, NHU_SCHEDULED, memory_order_acq_rel) != NHU_RUNNING) + ev_send_loop(tab->loop, tab->nhu_event); - if (tab->nhu_state != NHU_CLEAN) - ev_schedule(tab->nhu_event); + rt_unlock_table(tab); } @@ -2713,11 +2796,22 @@ rt_new_table(struct symbol *s, uint addr_type) * configuration. */ void -rt_lock_table(rtable *r) +rt_lock_table(rtable_private *r) { r->use_count++; } +static void +rt_loop_stopped(void *data) +{ + rtable_private *r = data; + birdloop_free(r->loop); + r->loop = NULL; + r->prune_event->list = r->ec_event->list = NULL; + r->nhu_event->list = r->hcu_event->list = NULL; + ev_send(r->delete_event->list, r->delete_event); +} + /** * rt_unlock_table - unlock a routing table * @r: routing table to be unlocked @@ -2727,14 +2821,14 @@ rt_lock_table(rtable *r) * for deletion by configuration changes. */ void -rt_unlock_table(rtable *r) +rt_unlock_table(rtable_private *r) { - if (!--r->use_count && r->delete_event) + if (!--r->use_count && r->delete_event && + !r->prune_state && !atomic_load_explicit(&r->nhu_state, memory_order_acquire)) /* Delete the routing table by freeing its pool */ - ev_schedule(r->delete_event); + birdloop_stop_self(r->loop, rt_loop_stopped, r); } - static struct rtable_config * rt_find_table_config(struct config *cf, char *name) { @@ -2745,7 +2839,9 @@ rt_find_table_config(struct config *cf, char *name) static void rt_done(void *data) { - rtable *t = data; + rtable_private *t = data; + ASSERT_DIE(t->loop == NULL); + struct rtable_config *tc = t->config; struct config *c = tc->config; @@ -2755,6 +2851,7 @@ rt_done(void *data) if (t->hostcache) rt_free_hostcache(t); + rfree(t->delete_event); rfree(t->rp); config_del_obstacle(c); @@ -2782,14 +2879,15 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *ot = o->table; + RT_LOCK(o->table); + rtable_private *ot = RT_PRIV(o->table); if (!ot->delete_event) { r = rt_find_table_config(new, o->name); if (r && (r->addr_type == o->addr_type) && !new->shutdown) { DBG("\t%s: same\n", o->name); - r->table = ot; + r->table = (rtable *) ot; ot->name = r->name; ot->config = r; if (o->sorted != r->sorted) @@ -2799,11 +2897,13 @@ rt_commit(struct config *new, struct config *old) { DBG("\t%s: deleted\n", o->name); rt_lock_table(ot); - ot->delete_event = ev_new_init(ot->rp, rt_done, ot); + ot->delete_event = ev_new_init(&root_pool, rt_done, ot); + ot->delete_event->list = &global_event_list; config_add_obstacle(old); rt_unlock_table(ot); } } + RT_UNLOCK(o->table); } } @@ -2834,46 +2934,98 @@ rt_feed_channel(void *data) struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; + RT_LOCK(c->table); + rtable_private *tab = RT_PRIV(c->table); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - FIB_ITERATE_START(&c->table->fib, fit, net, n) +redo: + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { FIB_ITERATE_PUT(fit); rt_send_export_event(c); + + RT_UNLOCK(c->table); return; } if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + { + RT_UNLOCK(c->table); return; + } - if (c->req->export_bulk) + if (!n->routes || !rte_is_valid(&n->routes->rte)) + ; /* if no route, do nothing */ + else if (c->req->export_bulk) { uint count = rte_feed_count(n); if (count) { rte **feed = alloca(count * sizeof(rte *)); rte_feed_obtain(n, feed, count); + + struct rt_pending_export *rpe_last, *rpe_first = n->first; + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_last = rpe; + + FIB_ITERATE_PUT_NEXT(fit, &tab->fib); + RT_UNLOCK(c->table); + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); + + RT_LOCK(c->table); + + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + { + rpe_mark_seen(c, rpe); + if (rpe == rpe_last) + break; + ASSERT_DIE(rpe->seq < rpe_last->seq); + } + max_feed -= count; + + goto redo; } } - else if (n->routes && rte_is_valid(&n->routes->rte)) + else if (c->req->export_one) { struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + + struct rt_pending_export *rpe_last, *rpe_first = n->first; + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_last = rpe; + + FIB_ITERATE_PUT_NEXT(fit, &tab->fib); + RT_UNLOCK(c->table); + c->req->export_one(c->req, n->n.addr, &rpe); + + RT_LOCK(c->table); + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + { + rpe_mark_seen(c, rpe); + if (rpe == rpe_last) + break; + ASSERT_DIE(rpe->seq < rpe_last->seq); + } + max_feed--; + goto redo; } - - for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) - rpe_mark_seen(c, rpe); + else + bug("Export request must always provide an export method"); } FIB_ITERATE_END; c->event->hook = rt_export_hook; rt_send_export_event(c); + RT_UNLOCK(c->table); + rt_set_export_state(c, TES_READY); } @@ -2981,7 +3133,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) } static void -rt_init_hostcache(rtable *tab) +rt_init_hostcache(rtable_private *tab) { struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache)); init_list(&hc->hostentries); @@ -2997,7 +3149,7 @@ rt_init_hostcache(rtable *tab) } static void -rt_free_hostcache(rtable *tab) +rt_free_hostcache(rtable_private *tab) { struct hostcache *hc = tab->hostcache; @@ -3020,13 +3172,13 @@ rt_free_hostcache(rtable *tab) } static void -rt_notify_hostcache(rtable *tab, net *net) +rt_notify_hostcache(rtable_private *tab, net *net) { if (ev_active(tab->hcu_event)) return; if (trie_match_net(tab->hostcache->trie, net->n.addr)) - ev_schedule(tab->hcu_event); + ev_send_loop(tab->loop, tab->hcu_event); } static int @@ -3059,7 +3211,7 @@ rt_get_igp_metric(rte *rt) } static int -rt_update_hostentry(rtable *tab, struct hostentry *he) +rt_update_hostentry(rtable_private *tab, struct hostentry *he) { rta *old_src = he->src; int direct = 0; @@ -3125,7 +3277,9 @@ done: static void rt_update_hostcache(void *data) { - rtable *tab = data; + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct hostcache *hc = tab->hostcache; struct hostentry *he; node *n, *x; @@ -3149,10 +3303,12 @@ rt_update_hostcache(void *data) } struct hostentry * -rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) +rt_get_hostentry(rtable *t, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; + rtable_private *tab = RT_LOCK(t); + if (!tab->hostcache) rt_init_hostcache(tab); @@ -3160,10 +3316,13 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) struct hostcache *hc = tab->hostcache; for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next) if (ipa_equal(he->addr, a) && (he->tab == dep)) - return he; + goto done; he = hc_new_hostentry(hc, tab->rp, a, ipa_zero(ll) ? a : ll, dep, k); rt_update_hostentry(tab, he); + +done: + RT_UNLOCK(t); return he; } diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 9b9013f9..1080db77 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -2140,7 +2140,7 @@ use_deterministic_med(struct rte_storage *r) } int -bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) +bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best) { rte *key = new ? new : old; u32 lpref = key->attrs->pref; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index dc845550..aac1f45c 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1800,10 +1800,12 @@ bgp_channel_start(struct channel *C) ip_addr src = p->local_ip; if (c->igp_table_ip4) - rt_lock_table(c->igp_table_ip4); + RT_LOCKED(c->igp_table_ip4, t) + rt_lock_table(t); if (c->igp_table_ip6) - rt_lock_table(c->igp_table_ip6); + RT_LOCKED(c->igp_table_ip6, t) + rt_lock_table(t); c->pool = p->p.pool; // XXXX bgp_init_bucket_table(c); @@ -1884,10 +1886,12 @@ bgp_channel_cleanup(struct channel *C) struct bgp_channel *c = (void *) C; if (c->igp_table_ip4) - rt_unlock_table(c->igp_table_ip4); + RT_LOCKED(c->igp_table_ip4, t) + rt_unlock_table(t); if (c->igp_table_ip6) - rt_unlock_table(c->igp_table_ip6); + RT_LOCKED(c->igp_table_ip6, t) + rt_unlock_table(t); c->index = 0; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 7cb4df1f..60f93bce 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -586,7 +586,7 @@ void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); -int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); +int bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best); void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint); u32 bgp_rte_igp_metric(struct rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 9d78438d..b40592d2 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -228,7 +228,7 @@ mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) NODE_VALID(tn); tn = tn->next) { - tab = SKIP_BACK(struct rtable, n, tn); + tab = SKIP_BACK(rtable, n, tn); if (patmatch(pattern, tab->name) && ((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6))) return tab; @@ -243,13 +243,21 @@ mrt_next_table(struct mrt_table_dump_state *s) rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); if (s->table) - rt_unlock_table(s->table); + { + RT_LOCK(s->table); + rt_unlock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } s->table = tab; s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; if (s->table) - rt_lock_table(s->table); + { + RT_LOCK(s->table); + rt_lock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } return s->table; } @@ -573,14 +581,23 @@ mrt_table_dump_init(pool *pp) static void mrt_table_dump_free(struct mrt_table_dump_state *s) { - if (s->table_open) - FIB_ITERATE_UNLINK(&s->fit, &s->table->fib); - if (s->table) - rt_unlock_table(s->table); + { + RT_LOCK(s->table); + + if (s->table_open) + FIB_ITERATE_UNLINK(&s->fit, &RT_PRIV(s->table)->fib); + + rt_unlock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } if (s->table_ptr) - rt_unlock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_unlock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } config_del_obstacle(s->config); @@ -596,8 +613,14 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) s->max = 2048; s->bws = &bws; + rtable_private *tab; + if (s->table_open) + { + RT_LOCK(s->table); + tab = RT_PRIV(s->table); goto step; + } while (mrt_next_table(s)) { @@ -606,15 +629,18 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_dump(s); - FIB_ITERATE_INIT(&s->fit, &s->table->fib); + RT_LOCK(s->table); + tab = RT_PRIV(s->table); + FIB_ITERATE_INIT(&s->fit, &tab->fib); s->table_open = 1; step: - FIB_ITERATE_START(&s->table->fib, &s->fit, net, n) + FIB_ITERATE_START(&tab->fib, &s->fit, net, n) { if (s->max < 0) { FIB_ITERATE_PUT(&s->fit); + RT_UNLOCK(s->table); return 0; } @@ -634,6 +660,7 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_flush(s); } + RT_UNLOCK(s->table); return 1; } @@ -661,7 +688,11 @@ mrt_timer(timer *t) s->always_add_path = cf->always_add_path; if (s->table_ptr) - rt_lock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_lock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } p->table_dump = s; ev_schedule(p->event); @@ -734,7 +765,11 @@ mrt_dump_cmd(struct mrt_dump_data *d) s->filename = d->filename; if (s->table_ptr) - rt_lock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_lock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } this_cli->cont = mrt_dump_cont; this_cli->cleanup = mrt_dump_cleanup; diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h index 4ff94c12..04865089 100644 --- a/proto/mrt/mrt.h +++ b/proto/mrt/mrt.h @@ -40,7 +40,7 @@ struct mrt_proto { struct mrt_dump_data { const char *table_expr; - struct rtable *table_ptr; + rtable *table_ptr; const struct filter *filter; const char *filename; }; @@ -60,7 +60,7 @@ struct mrt_table_dump_state { /* Configuration information */ const char *table_expr; /* Wildcard for table name (or NULL) */ - struct rtable *table_ptr; /* Explicit table (or NULL) */ + rtable *table_ptr; /* Explicit table (or NULL) */ const struct filter *filter; /* Optional filter */ const char *filename; /* Filename pattern */ int always_add_path; /* Always use *_ADDPATH message subtypes */ @@ -73,7 +73,7 @@ struct mrt_table_dump_state { HASH(struct mrt_peer_entry) peer_hash; /* Hash for peers to find the index */ - struct rtable *table; /* Processed table, NULL initially */ + rtable *table; /* Processed table, NULL initially */ struct fib_iterator fit; /* Iterator in processed table */ int table_open; /* Whether iterator is linked */ diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 8b2cb69f..aa688d88 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -198,7 +198,9 @@ perf_loop(void *data) p->exp++; } - rt_schedule_prune(P->main_channel->table); + RT_LOCK(P->main_channel->table); + rt_schedule_prune(RT_PRIV(P->main_channel->table)); + RT_UNLOCK(P->main_channel->table); ev_schedule(p->loop); } diff --git a/proto/radv/radv.c b/proto/radv/radv.c index fa228c69..d572c1b7 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -555,7 +555,10 @@ radv_check_active(struct radv_proto *p) return 1; struct channel *c = p->p.main_channel; - return rt_examine(c->table, &cf->trigger, c, c->out_filter); + RT_LOCK(c->table); + int active = rt_examine(RT_PRIV(c->table), &cf->trigger, c, c->out_filter); + RT_UNLOCK(c->table); + return active; } static void diff --git a/proto/static/static.c b/proto/static/static.c index 45791e8e..bd7f3f5b 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -491,10 +491,12 @@ static_start(struct proto *P) static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024)); if (p->igp_table_ip4) - rt_lock_table(p->igp_table_ip4); + RT_LOCKED(p->igp_table_ip4, t) + rt_lock_table(t); if (p->igp_table_ip6) - rt_lock_table(p->igp_table_ip6); + RT_LOCKED(p->igp_table_ip6, t) + rt_lock_table(t); p->event = ev_new_init(p->p.pool, static_announce_marked, p); @@ -521,10 +523,12 @@ static_shutdown(struct proto *P) static_reset_rte(p, r); if (p->igp_table_ip4) - rt_unlock_table(p->igp_table_ip4); + RT_LOCKED(p->igp_table_ip4, t) + rt_unlock_table(t); if (p->igp_table_ip6) - rt_unlock_table(p->igp_table_ip6); + RT_LOCKED(p->igp_table_ip6, t) + rt_unlock_table(t); return PS_DOWN; } diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 5431bebe..98c56391 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -427,6 +427,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + RT_LOCK(p->p.main_channel->table); /* Deleting all routes if flush is requested */ if (p->flush_routes) goto delete; @@ -435,7 +436,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) if (!p->ready) goto ignore; - net *net = net_find(p->p.main_channel->table, e->net); + net *net = net_find(RT_PRIV(p->p.main_channel->table), e->net); if (!net || !krt_is_installed(p, net)) goto delete; @@ -481,6 +482,7 @@ delete: goto done; done: + RT_UNLOCK(p->p.main_channel->table); lp_flush(krt_filter_lp); } @@ -498,7 +500,8 @@ krt_init_scan(struct krt_proto *p) static void krt_prune(struct krt_proto *p) { - struct rtable *t = p->p.main_channel->table; + RT_LOCK(p->p.main_channel->table); + rtable_private *t = RT_PRIV(p->p.main_channel->table); KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); FIB_WALK(&t->fib, net, n) @@ -518,6 +521,8 @@ krt_prune(struct krt_proto *p) } FIB_WALK_END; + RT_UNLOCK(p->p.main_channel->table); + #ifdef KRT_ALLOW_LEARN if (KRT_CF->learn) channel_refresh_end(p->p.main_channel); |