diff options
Diffstat (limited to 'nest/rt-table.c')
-rw-r--r-- | nest/rt-table.c | 2671 |
1 files changed, 1862 insertions, 809 deletions
diff --git a/nest/rt-table.c b/nest/rt-table.c index 29690414..36d69d92 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -43,10 +43,10 @@ * all prefixes that may influence resolving of tracked next hops. * * When a best route changes in the src table, the hostcache is notified using - * rt_notify_hostcache(), which immediately checks using the trie whether the + * an auxiliary export request, which checks using the trie whether the * change is relevant and if it is, then it schedules asynchronous hostcache * recomputation. The recomputation is done by rt_update_hostcache() (called - * from rt_event() of src table), it walks through all hostentries and resolves + * as an event of src table), it walks through all hostentries and resolves * them (by rt_update_hostentry()). It also updates the trie. If a change in * hostentry resolution was found, then it schedules asynchronous nexthop * recomputation of associated dst table. That is done by rt_next_hop_update() @@ -60,15 +60,14 @@ * routes depends of resolving their network prefixes in IP routing tables. This * is similar to the recursive next hop mechanism, but simpler as there are no * intermediate hostcache and hostentries (because flows are less likely to - * share common net prefix than routes sharing a common next hop). In src table, - * there is a list of dst tables (list flowspec_links), this list is updated by - * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during - * channel start/stop). Each dst table has its own trie of prefixes that may - * influence validation of flowspec routes in it (flowspec_trie). + * share common net prefix than routes sharing a common next hop). Every dst + * table has its own export request in every src table. Each dst table has its + * own trie of prefixes that may influence validation of flowspec routes in it + * (flowspec_trie). * - * When a best route changes in the src table, rt_flowspec_notify() immediately - * checks all dst tables from the list using their tries to see whether the - * change is relevant for them. If it is, then an asynchronous re-validation of + * When a best route changes in the src table, the notification mechanism is + * invoked by the export request which checks its dst table's trie to see + * whether the change is relevant, and if so, an asynchronous re-validation of * flowspec routes in the dst table is scheduled. That is also done by function * rt_next_hop_update(), like nexthop recomputation above. It iterates over all * flowspec routes and re-validates them. It also recalculates the trie. @@ -83,9 +82,8 @@ * will be re-validated later in this round anyway. * * The third mechanism is used for RPKI re-validation of IP routes and it is the - * simplest. It is just a list of subscribers in src table, who are notified - * when any change happened, but only after a settle time. Also, in RPKI case - * the dst is not a table, but a channel, who refeeds routes through a filter. + * simplest. It is also an auxiliary export request belonging to the + * appropriate channel, triggering its reload/refeed timer after a settle time. */ #undef LOCAL_DEBUG @@ -105,32 +103,53 @@ #include "lib/string.h" #include "lib/alloca.h" #include "lib/flowspec.h" +#include "lib/idm.h" #ifdef CONFIG_BGP #include "proto/bgp/bgp.h" #endif -pool *rt_table_pool; +#include <stdatomic.h> -static linpool *rte_update_pool; +pool *rt_table_pool; list routing_tables; list deleted_routing_tables; -static void rt_free_hostcache(rtable *tab); -static void rt_notify_hostcache(rtable *tab, net *net); -static void rt_update_hostcache(rtable *tab); -static void rt_next_hop_update(rtable *tab); +struct rt_cork rt_cork; + +/* Data structures for export journal */ +#define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) + +struct rt_export_block { + node n; + _Atomic u32 end; + _Atomic _Bool not_last; + struct rt_pending_export export[]; +}; + +static void rt_free_hostcache(struct rtable_private *tab); +static void rt_update_hostcache(void *tab); +static void rt_next_hop_update(struct rtable_private *tab); +static void rt_nhu_uncork(void *_tab); static inline void rt_next_hop_resolve_rte(rte *r); static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); -static inline void rt_prune_table(rtable *tab); -static inline void rt_schedule_notify(rtable *tab); -static void rt_flowspec_notify(rtable *tab, net *net); +static inline void rt_prune_table(struct rtable_private *tab); +static void rt_kick_prune_timer(struct rtable_private *tab); static void rt_feed_by_fib(void *); static void rt_feed_by_trie(void *); static void rt_feed_equal(void *); static void rt_feed_for(void *); -static uint rt_feed_net(struct rt_export_hook *c, net *n); +static void rt_check_cork_low(struct rtable_private *tab); +static void rt_check_cork_high(struct rtable_private *tab); +static void rt_cork_release_hook(void *); +static void rt_shutdown(void *); +static void rt_delete(void *); + +static void rt_export_used(struct rt_table_exporter *, const char *, const char *); +static void rt_export_cleanup(struct rtable_private *tab); + +static int rte_same(rte *x, rte *y); const char *rt_import_state_name_array[TIS_MAX] = { [TIS_DOWN] = "DOWN", @@ -143,6 +162,7 @@ const char *rt_import_state_name_array[TIS_MAX] = { const char *rt_export_state_name_array[TES_MAX] = { [TES_DOWN] = "DOWN", + [TES_HUNGRY] = "HUNGRY", [TES_FEEDING] = "FEEDING", [TES_READY] = "READY", [TES_STOP] = "STOP" @@ -164,13 +184,22 @@ const char *rt_export_state_name(u8 state) return rt_export_state_name_array[state]; } -static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); -static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +static struct hostentry *rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep); + +static inline rtable *rt_priv_to_pub(struct rtable_private *tab) { return RT_PUB(tab); } +static inline rtable *rt_pub_to_pub(rtable *tab) { return tab; } +#define RT_ANY_TO_PUB(tab) _Generic((tab),rtable*:rt_pub_to_pub,struct rtable_private*:rt_priv_to_pub)((tab)) + +#define rt_trace(tab, level, fmt, args...) do {\ + rtable *t = RT_ANY_TO_PUB((tab)); \ + if (t->config->debug & (level)) \ + log(L_TRACE "%s: " fmt, t->name, ##args); \ +} while (0) static void net_init_with_trie(struct fib *f, void *N) { - rtable *tab = SKIP_BACK(rtable, fib, f); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, fib, f); net *n = N; if (tab->trie) @@ -181,7 +210,7 @@ net_init_with_trie(struct fib *f, void *N) } static inline net * -net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +net_route_ip4_trie(struct rtable_private *t, const net_addr_ip4 *n0) { TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) { @@ -195,7 +224,7 @@ net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) } static inline net * -net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +net_route_vpn4_trie(struct rtable_private *t, const net_addr_vpn4 *n0) { TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) { @@ -211,7 +240,7 @@ net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) } static inline net * -net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +net_route_ip6_trie(struct rtable_private *t, const net_addr_ip6 *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) { @@ -225,7 +254,7 @@ net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) } static inline net * -net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +net_route_vpn6_trie(struct rtable_private *t, const net_addr_vpn6 *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) { @@ -241,7 +270,7 @@ net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) } static inline void * -net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_trie(struct rtable_private *t, const net_addr_ip6_sadr *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) { @@ -274,7 +303,7 @@ net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) } static inline net * -net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) +net_route_ip4_fib(struct rtable_private *t, const net_addr_ip4 *n0) { net_addr_ip4 n; net_copy_ip4(&n, n0); @@ -290,7 +319,7 @@ net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) } static inline net * -net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +net_route_vpn4_fib(struct rtable_private *t, const net_addr_vpn4 *n0) { net_addr_vpn4 n; net_copy_vpn4(&n, n0); @@ -306,7 +335,7 @@ net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) } static inline net * -net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) +net_route_ip6_fib(struct rtable_private *t, const net_addr_ip6 *n0) { net_addr_ip6 n; net_copy_ip6(&n, n0); @@ -322,7 +351,7 @@ net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) } static inline net * -net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +net_route_vpn6_fib(struct rtable_private *t, const net_addr_vpn6 *n0) { net_addr_vpn6 n; net_copy_vpn6(&n, n0); @@ -338,7 +367,7 @@ net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) } static inline void * -net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_fib(struct rtable_private *t, const net_addr_ip6_sadr *n0) { net_addr_ip6_sadr n; net_copy_ip6_sadr(&n, n0); @@ -378,7 +407,7 @@ net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) } net * -net_route(rtable *tab, const net_addr *n) +net_route(struct rtable_private *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); @@ -421,7 +450,7 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn) { int anything = 0; @@ -449,7 +478,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_fib(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -481,7 +510,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn) { int anything = 0; @@ -509,7 +538,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) } static int -net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_fib(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -556,24 +585,30 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) * must have type NET_IP4 or NET_IP6, respectively. */ int -net_roa_check(rtable *tab, const net_addr *n, u32 asn) +net_roa_check(rtable *tp, const net_addr *n, u32 asn) { - if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - { - if (tab->trie) - return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); - else - return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); - } - else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + int out = ROA_UNKNOWN; + + RT_LOCKED(tp, tab) { - if (tab->trie) - return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) + { + if (tab->trie) + out = net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + out = net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } + else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + { + if (tab->trie) + out = net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + out = net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else - return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + out = ROA_UNKNOWN; /* Should not happen */ } - else - return ROA_UNKNOWN; /* Should not happen */ + return out; } /** @@ -597,7 +632,7 @@ rte_find(net *net, struct rte_src *src) struct rte_storage * -rte_store(const rte *r, net *net, rtable *tab) +rte_store(const rte *r, net *net, struct rtable_private *tab) { struct rte_storage *e = sl_alloc(tab->rte_slab); @@ -647,16 +682,16 @@ rte_better(rte *new, rte *old) return 1; if (np < op) return 0; - if (new->src->proto->proto != old->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols * have the same preference, try to break the tie by comparing addresses. Not too * useful, but keeps the ordering of routes unambiguous. */ - return new->src->proto->proto > old->src->proto->proto; + return new->src->owner->class > old->src->owner->class; } - if (better = new->src->proto->rte_better) + if (better = new->src->owner->class->rte_better) return better(new, old); return 0; } @@ -672,10 +707,10 @@ rte_mergable(rte *pri, rte *sec) if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; - if (pri->src->proto->proto != sec->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; @@ -684,8 +719,9 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s %c %s %N %uL %uG %s", - name, dir, msg, e->net, e->src->private_id, e->src->global_id, + log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, e->id, rta_dest_name(rte_dest(e))); } @@ -725,8 +761,8 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) - count++; + count++; + return count; } @@ -735,11 +771,11 @@ rte_feed_obtain(net *n, struct rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; } + ASSERT_DIE(i == count); } @@ -847,6 +883,16 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) static void rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { + if (new && old && rte_same(new, old)) + { + if ((new->id != old->id) && bmap_test(&c->export_map, old->id)) + { + bmap_set(&c->export_map, new->id); + bmap_clear(&c->export_map, old->id); + } + return; + } + if (new) new = export_filter(c, new, 0); @@ -859,8 +905,18 @@ rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) do_rt_notify(c, net, new, old); } +static void +channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + rpe_mark_seen(req->hook, rpe); + if (rpe->old) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} + void -rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, out_req, req); @@ -902,38 +958,26 @@ rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_p } } +done: /* Check obsolete routes for previously exported */ - if (!old_best) - if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) - old_best = &rpe->old->rte; - -/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + RPE_WALK(first, rpe, NULL) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) { - if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + if (bmap_test(&c->export_map, rpe->old->rte.id)) { - old_best = &rpe->old.rte; - break; + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; } - - if (rpe == rpe_last) - break; } - */ + } /* Nothing to export */ - if (!new_best && !old_best) - { + if (new_best || old_best) + do_rt_notify(c, n, new_best, old_best); + else DBG("rt_notify_accepted: nothing to export\n"); - goto done; - } - - do_rt_notify(c, n, new_best, old_best); - -done: - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); } rte * @@ -999,7 +1043,7 @@ rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool } void -rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, out_req, req); @@ -1025,63 +1069,70 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen } /* Check obsolete routes for previously exported */ - if (!old_best) - if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) - old_best = &rpe->old->rte; - -/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + RPE_WALK(first, rpe, NULL) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) { - if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + if (bmap_test(&c->export_map, rpe->old->rte.id)) { - old_best = &rpe->old.rte; - break; + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; } - - if (rpe == rpe_last) - break; } - */ + } /* Prepare new merged route */ - rte *new_merged = count ? rt_export_merged(c, feed, count, rte_update_pool, 0) : NULL; + rte *new_merged = count ? rt_export_merged(c, feed, count, tmp_linpool, 0) : NULL; if (new_merged || old_best) do_rt_notify(c, n, new_merged, old_best); - - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); } void -rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte n0; + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; - if (rpe->new_best != rpe->old_best) - rt_notify_basic(c, net, RTE_COPY(rpe->new_best, &n0), RTE_OR_NULL(rpe->old_best)); + RPE_WALK(first, rpe, NULL) + { + channel_rpe_mark_seen(req, rpe); + new_best = rpe->new_best; + } - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); + rte n0 = RTE_COPY_VALID(new_best); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void -rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte n0; - if (rpe->new != rpe->old) - rt_notify_basic(c, net, RTE_COPY(rpe->new, &n0), RTE_OR_NULL(rpe->old)); + rte *n = RTE_VALID_OR_NULL(first->new); + rte *o = RTE_VALID_OR_NULL(first->old); - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); + if (!n && !o) + { + channel_rpe_mark_seen(req, first); + return; + } + + struct rte_src *src = n ? n->src : o->src; + struct rte_storage *new_latest = first->new; + + RPE_WALK(first, rpe, src) + { + channel_rpe_mark_seen(req, rpe); + new_latest = rpe->new; + } + + rte n0 = RTE_COPY_VALID(new_latest); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void @@ -1090,10 +1141,108 @@ rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pendin struct channel *c = SKIP_BACK(struct channel, out_req, req); for (uint i=0; i<count; i++) + if (rte_is_valid(feed[i])) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} + +void +rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + bmap_set(&hook->seq_map, rpe->seq); +} + +struct rt_pending_export * +rpe_next(struct rt_pending_export *rpe, struct rte_src *src) +{ + struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + if (!next) + return NULL; + + if (!src) + return next; + + while (rpe = next) + if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src)) + return rpe; + else + next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + return NULL; +} + +static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); +static int +rte_export(struct rt_table_export_hook *th, struct rt_pending_export *rpe) +{ + rtable *tab = RT_PUB(SKIP_BACK(struct rtable_private, exporter, th->table)); + struct rt_export_hook *hook = &th->h; + if (bmap_test(&hook->seq_map, rpe->seq)) + goto ignore; /* Seen already */ + + const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + + switch (hook->req->addr_mode) + { + case TE_ADDR_NONE: + break; + + case TE_ADDR_IN: + if (!net_in_netX(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_EQUAL: + if (!net_equal(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_FOR: + bug("Continuos export of best prefix match not implemented yet."); + + default: + bug("Strange table export address mode: %d", hook->req->addr_mode); + } + + if (rpe->new) + hook->stats.updates_received++; + else + hook->stats.withdraws_received++; + + if (hook->req->export_one) + hook->req->export_one(hook->req, n, rpe); + else if (hook->req->export_bulk) { - rte n0 = *feed[i]; - rt_notify_basic(c, net, &n0, NULL); + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + RT_LOCK(tab); + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) + { + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + RT_UNLOCK(tab); + hook->req->export_bulk(hook->req, n, rpe, feed, count); } + else + bug("Export request must always provide an export method"); + +ignore: + /* Get the next export if exists */ + th->rpe_next = rt_next_export_fast(rpe); + + /* The last block may be available to free */ + int used = (PAGE_HEAD(th->rpe_next) != PAGE_HEAD(rpe)); + + /* Releasing this export for cleanup routine */ + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); + atomic_store_explicit(&th->last_export, rpe, memory_order_release); + + return used; } /** @@ -1128,91 +1277,262 @@ rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pendin * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce(struct rtable_private *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { - if (!rte_is_valid(RTE_OR_NULL(new))) - new = NULL; + int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); + int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best)); - if (!rte_is_valid(RTE_OR_NULL(old))) - old = NULL; + if ((new == old) && (new_best == old_best)) + return; - if (!rte_is_valid(RTE_OR_NULL(new_best))) - new_best = NULL; + if (new_best_valid) + new_best->rte.sender->stats.pref++; + if (old_best_valid) + old_best->rte.sender->stats.pref--; - if (!rte_is_valid(RTE_OR_NULL(old_best))) - old_best = NULL; + if (EMPTY_LIST(tab->exporter.e.hooks) && EMPTY_LIST(tab->exporter.pending)) + { + /* No export hook and no pending exports to cleanup. We may free the route immediately. */ + if (!old) + return; - if (!new && !old && !new_best && !old_best) + hmap_clear(&tab->id_map, old->rte.id); + rte_free(old); return; + } + + /* Get the pending export structure */ + struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; + u32 end = 0; - if (new_best != old_best) + if (!EMPTY_LIST(tab->exporter.pending)) { - if (new_best) - new_best->rte.sender->stats.pref++; - if (old_best) - old_best->rte.sender->stats.pref--; + rpeb = TAIL(tab->exporter.pending); + end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); + if (end >= RT_PENDING_EXPORT_ITEMS) + { + ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS); + rpebsnl = rpeb; - if (tab->hostcache) - rt_notify_hostcache(tab, net); + rpeb = NULL; + end = 0; + } + } - if (!EMPTY_LIST(tab->flowspec_links)) - rt_flowspec_notify(tab, net); + if (!rpeb) + { + rpeb = alloc_page(); + *rpeb = (struct rt_export_block) {}; + add_tail(&tab->exporter.pending, &rpeb->n); } - rt_schedule_notify(tab); + /* Fill the pending export */ + struct rt_pending_export *rpe = &rpeb->export[rpeb->end]; + *rpe = (struct rt_pending_export) { + .new = new, + .new_best = new_best, + .old = old, + .old_best = old_best, + .seq = tab->exporter.next_seq++, + }; + + DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu", + tab->name, net->n.addr, + new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL, + old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL, + new_best, old_best, rpe->seq); + + ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); - struct rt_pending_export rpe = { .new = new, .old = old, .new_best = new_best, .old_best = old_best }; - uint count = rte_feed_count(net); - rte **feed = NULL; - if (count) + if (rpebsnl) { - feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(net, feed, count); + _Bool f = 0; + ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1, + memory_order_release, memory_order_relaxed)); } - struct rt_export_hook *eh; - WALK_LIST(eh, tab->exporter.hooks) + /* Append to the same-network squasher list */ + if (net->last) { - if (eh->export_state == TES_STOP) - continue; + struct rt_pending_export *rpenull = NULL; + ASSERT_DIE(atomic_compare_exchange_strong_explicit( + &net->last->next, &rpenull, rpe, + memory_order_relaxed, + memory_order_relaxed)); - switch (eh->req->addr_mode) - { - case TE_ADDR_NONE: - break; + } - case TE_ADDR_IN: - if (!net_in_netX(net->n.addr, eh->req->addr)) - continue; - break; + net->last = rpe; - case TE_ADDR_EQUAL: - if (!net_equal(net->n.addr, eh->req->addr)) + if (!net->first) + net->first = rpe; + + if (tab->exporter.first == NULL) + tab->exporter.first = rpe; + + rt_check_cork_high(tab); +} + +static struct rt_pending_export * +rt_next_export_fast(struct rt_pending_export *last) +{ + /* Get the whole export block and find our position in there. */ + struct rt_export_block *rpeb = PAGE_HEAD(last); + u32 pos = (last - &rpeb->export[0]); + u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire); + ASSERT_DIE(pos < end); + + /* Next is in the same block. */ + if (++pos < end) + return &rpeb->export[pos]; + + /* There is another block. */ + if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire)) + { + /* This is OK to do non-atomically because of the not_last flag. */ + rpeb = NODE_NEXT(rpeb); + return &rpeb->export[0]; + } + + /* There is nothing more. */ + return NULL; +} + +static struct rt_pending_export * +rt_next_export(struct rt_table_export_hook *hook, struct rt_table_exporter *tab) +{ + ASSERT_DIE(RT_IS_LOCKED(SKIP_BACK(struct rtable_private, exporter, tab))); + + /* As the table is locked, it is safe to reload the last export pointer */ + struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); + + /* It is still valid, let's reuse it */ + if (last) + return rt_next_export_fast(last); + + /* No, therefore we must process the table's first pending export */ + else + return tab->first; +} + +static inline void +rt_send_export_event(struct rt_export_hook *hook) +{ + ev_send(hook->req->list, &hook->event); +} + +static void +rt_announce_exports(struct settle *s) +{ + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, export_settle, s)), tab) + if (!EMPTY_LIST(tab->exporter.pending)) + { + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.e.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) continue; - break; - case TE_ADDR_FOR: - bug("Continuos export of best prefix match not implemented yet."); + rt_send_export_event(c); + } + } +} - default: - bug("Strange table export address mode: %d", eh->req->addr_mode); +static void +rt_kick_export_settle(struct rtable_private *tab) +{ + tab->export_settle.cf = tab->rr_counter ? tab->config->export_rr_settle : tab->config->export_settle; + settle_kick(&tab->export_settle, tab->loop); +} + +static void +rt_import_announce_exports(void *_hook) +{ + struct rt_import_hook *hook = _hook; + if (hook->import_state == TIS_CLEARED) + { + void (*stopped)(struct rt_import_request *) = hook->stopped; + struct rt_import_request *req = hook->req; + + RT_LOCKED(hook->table, tab) + { + req->hook = NULL; + + rt_trace(tab, D_EVENTS, "Hook %s stopped", req->name); + rem_node(&hook->n); + mb_free(hook); + rt_unlock_table(tab); } - if (new) - eh->stats.updates_received++; - else - eh->stats.withdraws_received++; + stopped(req); + return; + } - if (eh->req->export_one) - eh->req->export_one(eh->req, net->n.addr, &rpe); - else if (eh->req->export_bulk) - eh->req->export_bulk(eh->req, net->n.addr, &rpe, feed, count); - else - bug("Export request must always provide an export method"); + rt_trace(hook->table, D_EVENTS, "Announcing exports after imports from %s", hook->req->name); + birdloop_flag(hook->table->loop, RTF_EXPORT); +} + +static struct rt_pending_export * +rt_last_export(struct rt_table_exporter *tab) +{ + struct rt_pending_export *rpe = NULL; + + if (!EMPTY_LIST(tab->pending)) + { + /* We'll continue processing exports from this export on */ + struct rt_export_block *reb = TAIL(tab->pending); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } + + return rpe; +} + +#define RT_EXPORT_BULK 1024 + +static void +rt_export_hook(void *_data) +{ + struct rt_table_export_hook *c = _data; + rtable *tab = SKIP_BACK(rtable, priv.exporter, c->table); + + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + { + RT_LOCK(tab); + c->rpe_next = rt_next_export(c, c->table); + + if (!c->rpe_next) + { + rt_export_used(c->table, c->h.req->name, "done exporting"); + RT_UNLOCK(tab); + return; + } + + RT_UNLOCK(tab); + } + + int used = 0; + + /* Process the export */ + for (uint i=0; i<RT_EXPORT_BULK; i++) + { + used += rte_export(c, c->rpe_next); + + if (!c->rpe_next) + break; } + + if (used) + RT_LOCKED(tab, _) + rt_export_used(c->table, c->h.req->name, "finished export bulk"); + + rt_send_export_event(&c->h); } + static inline int rte_validate(struct channel *ch, rte *e) { @@ -1279,11 +1599,10 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } -static void -rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) +static int +rte_recalculate(struct rtable_private *table, struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { struct rt_import_request *req = c->req; - struct rtable *table = c->table; struct rt_import_stats *stats = &c->stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; @@ -1313,17 +1632,16 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr { if (!old->generation && !new->generation) bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); } if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { @@ -1333,7 +1651,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr /* We need to free the already stored route here before returning */ rte_free(new_stored); - return; + return 0; } *before_old = (*before_old)->next; @@ -1343,7 +1661,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (!old && !new) { stats->withdraws_ignored++; - return; + return 0; } /* If rejected by import limit, we need to pretend there is no route */ @@ -1393,8 +1711,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && - src->proto->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) + if (src->owner->rte_recalculate && + src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; if (new_stored && rte_better(&new_stored->rte, old_best)) @@ -1458,14 +1776,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (new_stored) { new_stored->rte.lastmod = current_time(); - - if (!old) - { - new_stored->rte.id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new_stored->rte.id); - } - else - new_stored->rte.id = old->id; + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } /* Log the route change */ @@ -1485,41 +1797,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr rte_announce(table, net, new_stored, old_stored, net->routes, old_best_stored); - if (!net->routes && - (table->gc_counter++ >= table->config->gc_max_ops) && - (table->gc_time + table->config->gc_min_time <= current_time())) - rt_schedule_prune(table); - -#if 0 - /* Enable and reimplement these callbacks if anybody wants to use them */ - if (old_ok && p->rte_remove) - p->rte_remove(net, old); - if (new_ok && p->rte_insert) - p->rte_insert(net, &new_stored->rte); -#endif - - if (old) - { - if (!new_stored) - hmap_clear(&table->id_map, old->id); - - rte_free(old_stored); - } -} - -static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */ - -static inline void -rte_update_lock(void) -{ - rte_update_nest_cnt++; -} - -static inline void -rte_update_unlock(void) -{ - if (!--rte_update_nest_cnt) - lp_flush(rte_update_pool); + return 1; } int @@ -1571,7 +1849,6 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) const struct filter *filter = c->in_filter; struct channel_import_stats *stats = &c->import_stats; - rte_update_lock(); if (new) { new->net = n; @@ -1622,7 +1899,6 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ea_free(a); } - rte_update_unlock(); } void @@ -1632,110 +1908,90 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt if (!hook) return; - net *nn; - if (new) + RT_LOCKED(hook->table, tab) + { + net *nn; + if (new) { /* Use the actual struct network, not the dummy one */ - nn = net_get(hook->table, n); + nn = net_get(tab, n); new->net = nn->n.addr; new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } - else if (!(nn = net_find(hook->table, n))) + else if (!(nn = net_find(tab, n))) { req->hook->stats.withdraws_ignored++; - return; + RT_RETURN(tab); } - /* And recalculate the best route */ - rte_recalculate(hook, nn, new, src); -} - -/* Independent call to rte_announce(), used from next hop - recalculation, outside of rte_update(). new must be non-NULL */ -static inline void -rte_announce_i(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, - struct rte_storage *new_best, struct rte_storage *old_best) -{ - rte_update_lock(); - rte_announce(tab, net, new, old, new_best, old_best); - rte_update_unlock(); -} - -static inline void -rte_discard(net *net, rte *old) /* Non-filtered route deletion, used during garbage collection */ -{ - rte_update_lock(); - rte_recalculate(old->sender, net, NULL, old->src); - rte_update_unlock(); -} - -/* Modify existing route by protocol hook, used for long-lived graceful restart */ -static inline void -rte_modify(net *net, rte *old) -{ - rte_update_lock(); - - rte *new = old->sender->req->rte_modify(old, rte_update_pool); - if (new != old) - { - if (new) - new->flags = old->flags & ~REF_MODIFY; - - rte_recalculate(old->sender, net, new, old->src); + /* Recalculate the best route */ + if (rte_recalculate(tab, hook, nn, new, src)) + ev_send(req->list, &hook->announce_event); } - - rte_update_unlock(); } /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +rt_examine(rtable *tp, net_addr *a, struct channel *c, const struct filter *filter) { - net *n = net_find(t, a); + rte rt = {}; - if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) - return 0; - - rte rt = n->routes->rte; + RT_LOCKED(tp, t) + { + net *n = net_find(t, a); + if (n) + rt = RTE_COPY_VALID(n->routes); + } - rte_update_lock(); + if (!rt.src) + return 0; - /* Rest is stripped down export_filter() */ int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; if (v == RIC_PROCESS) v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); - rte_update_unlock(); - return v > 0; } static void -rt_table_export_done(struct rt_export_hook *hook) +rt_table_export_done(void *hh) { - struct rt_exporter *re = hook->table; - struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + struct rt_table_export_hook *hook = hh; + struct rt_export_request *req = hook->h.req; + void (*stopped)(struct rt_export_request *) = hook->h.stopped; + rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table); + + RT_LOCKED(t, tab) + { + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + + /* Drop pending exports */ + rt_export_used(&tab->exporter, hook->h.req->name, "stopped"); + + /* Do the common code; this frees the hook */ + rt_export_stopped(&hook->h); + } - rt_unlock_table(tab); - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + /* Report the channel as stopped. */ + CALL(stopped, req); + + /* Unlock the table; this may free it */ + rt_unlock_table(t); } -static void -rt_export_stopped(void *data) +void +rt_export_stopped(struct rt_export_hook *hook) { - struct rt_export_hook *hook = data; - struct rt_exporter *tab = hook->table; + /* Unlink from the request */ + hook->req->hook = NULL; /* Unlist */ rem_node(&hook->n); - /* Reporting the channel as stopped. */ - hook->stopped(hook->req); - - /* Reporting the hook as finished. */ - CALL(tab->done, hook); - - /* Freeing the hook together with its coroutine. */ + /* Free the hook itself together with its pool */ rfree(hook->pool); } @@ -1745,36 +2001,38 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state) hook->last_state_change = current_time(); hook->import_state = state; - if (hook->req->log_state_change) - hook->req->log_state_change(hook->req, state); + CALL(hook->req->log_state_change, hook->req, state); } void rt_set_export_state(struct rt_export_hook *hook, u8 state) { hook->last_state_change = current_time(); - hook->export_state = state; + u8 old = atomic_exchange_explicit(&hook->export_state, state, memory_order_release); - if (hook->req->log_state_change) - hook->req->log_state_change(hook->req, state); + if (old != state) + CALL(hook->req->log_state_change, hook->req, state); } void -rt_request_import(rtable *tab, struct rt_import_request *req) +rt_request_import(rtable *t, struct rt_import_request *req) { - rt_lock_table(tab); + RT_LOCKED(t, tab) + { + rt_lock_table(tab); - struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); - DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + hook->announce_event = (event) { .hook = rt_import_announce_exports, .data = hook }; - hook->req = req; - hook->table = tab; + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); - rt_set_import_state(hook, TIS_UP); + hook->req = req; + hook->table = t; - hook->n = (node) {}; - add_tail(&tab->imports, &hook->n); + rt_set_import_state(hook, TIS_UP); + add_tail(&tab->imports, &hook->n); + } } void @@ -1783,23 +2041,77 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r ASSERT_DIE(req->hook); struct rt_import_hook *hook = req->hook; - rt_schedule_prune(hook->table); + RT_LOCKED(hook->table, tab) + { + rt_schedule_prune(tab); + rt_set_import_state(hook, TIS_STOP); + hook->stopped = stopped; - rt_set_import_state(hook, TIS_STOP); + if (hook->stale_set != hook->stale_pruned) + tab->rr_counter -= (hook->stale_set - hook->stale_pruned - 1); + else + tab->rr_counter++; - hook->stopped = stopped; + hook->stale_set = hook->stale_pruned = hook->stale_pruning = hook->stale_valid = 0; + } } -static struct rt_export_hook * -rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +static void rt_table_export_start_feed(struct rtable_private *tab, struct rt_table_export_hook *hook); +static void +rt_table_export_uncork(void *_hook) { - rtable *tab = SKIP_BACK(rtable, exporter, re); + ASSERT_DIE(birdloop_inside(&main_birdloop)); + + struct rt_table_export_hook *hook = _hook; + struct birdloop *loop = hook->h.req->list->loop; + + if (loop != &main_birdloop) + birdloop_enter(loop); + + u8 state; + switch (state = atomic_load_explicit(&hook->h.export_state, memory_order_relaxed)) + { + case TES_HUNGRY: + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, hook->table)), tab) + rt_table_export_start_feed(tab, hook); + break; + case TES_STOP: + rt_stop_export_common(&hook->h); + break; + default: + bug("Uncorking a table export in a strange state: %u", state); + } + + if (loop != &main_birdloop) + birdloop_leave(loop); +} + +static void +rt_table_export_start_locked(struct rtable_private *tab, struct rt_export_request *req) +{ + struct rt_exporter *re = &tab->exporter.e; rt_lock_table(tab); - pool *p = rp_new(tab->rp, "Export hook"); - struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); - hook->pool = p; - hook->lp = lp_new_default(p); + req->hook = rt_alloc_export(re, sizeof(struct rt_table_export_hook)); + req->hook->req = req; + + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, req->hook); + hook->h.event = (event) { + .hook = rt_table_export_uncork, + .data = hook, + }; + + if (rt_cork_check(&hook->h.event)) + rt_set_export_state(&hook->h, TES_HUNGRY); + else + rt_table_export_start_feed(tab, hook); +} + +static void +rt_table_export_start_feed(struct rtable_private *tab, struct rt_table_export_hook *hook) +{ + struct rt_exporter *re = &tab->exporter.e; + struct rt_export_request *req = hook->h.req; /* stats zeroed by mb_allocz */ switch (req->addr_mode) @@ -1807,24 +2119,25 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) case TE_ADDR_IN: if (tab->trie && net_val_match(tab->addr_type, NB_IP)) { - hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_state = mb_allocz(hook->h.pool, sizeof (struct f_trie_walk_state)); hook->walk_lock = rt_lock_trie(tab); trie_walk_init(hook->walk_state, tab->trie, req->addr); - hook->event = ev_new_init(p, rt_feed_by_trie, hook); + hook->h.event.hook = rt_feed_by_trie; + hook->walk_last.type = 0; break; } /* fall through */ case TE_ADDR_NONE: FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); - hook->event = ev_new_init(p, rt_feed_by_fib, hook); + hook->h.event.hook = rt_feed_by_fib; break; case TE_ADDR_EQUAL: - hook->event = ev_new_init(p, rt_feed_equal, hook); + hook->h.event.hook = rt_feed_equal; break; case TE_ADDR_FOR: - hook->event = ev_new_init(p, rt_feed_for, hook); + hook->h.event.hook = rt_feed_for; break; default: @@ -1833,72 +2146,137 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); - return hook; + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + + rt_init_export(re, req->hook); +} + +static void +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +{ + RT_LOCKED(SKIP_BACK(rtable, priv.exporter.e, re), tab) + rt_table_export_start_locked(tab, req); +} + +void rt_request_export(rtable *t, struct rt_export_request *req) +{ + RT_LOCKED(t, tab) + rt_table_export_start_locked(tab, req); /* Is locked inside */ } void -rt_request_export(struct rt_exporter *re, struct rt_export_request *req) +rt_request_export_other(struct rt_exporter *re, struct rt_export_request *req) { - struct rt_export_hook *hook = req->hook = re->start(re, req); + return re->class->start(re, req); +} - hook->req = req; +struct rt_export_hook * +rt_alloc_export(struct rt_exporter *re, uint size) +{ + pool *p = rp_new(re->rp, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, size); + + hook->pool = p; hook->table = re; + return hook; +} + +void +rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook) +{ + hook->event.data = hook; + + bmap_init(&hook->seq_map, hook->pool, 1024); + hook->n = (node) {}; add_tail(&re->hooks, &hook->n); /* Regular export */ rt_set_export_state(hook, TES_FEEDING); - ev_schedule_work(hook->event); + rt_send_export_event(hook); } -static void -rt_table_export_stop(struct rt_export_hook *hook) +static int +rt_table_export_stop_locked(struct rt_export_hook *hh) { - rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, hook->table); - if (hook->export_state != TES_FEEDING) - return; - - switch (hook->req->addr_mode) + switch (atomic_load_explicit(&hh->export_state, memory_order_relaxed)) { - case TE_ADDR_IN: - if (hook->walk_lock) + case TES_HUNGRY: + return 0; + case TES_FEEDING: + switch (hh->req->addr_mode) { - rt_unlock_trie(tab, hook->walk_lock); - hook->walk_lock = NULL; - mb_free(hook->walk_state); - hook->walk_state = NULL; - break; + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; } - /* fall through */ - case TE_ADDR_NONE: - fit_get(&tab->fib, &hook->feed_fit); - break; + } + return 1; +} + +static void +rt_table_export_stop(struct rt_export_hook *hh) +{ + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); + int ok = 0; + rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table); + if (RT_IS_LOCKED(t)) + ok = rt_table_export_stop_locked(hh); + else + RT_LOCKED(t, tab) + ok = rt_table_export_stop_locked(hh); + + if (ok) + rt_stop_export_common(hh); + else + rt_set_export_state(&hook->h, TES_STOP); } void rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) { + ASSERT_DIE(birdloop_inside(req->list->loop)); ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; - /* Cancel the feeder event */ - ev_postpone(hook->event); - - /* Stop feeding from the exporter */ - CALL(hook->table->stop, hook); - - /* Reset the event as the stopped event */ - hook->event->hook = rt_export_stopped; + /* Set the stopped callback */ hook->stopped = stopped; + /* Run the stop code */ + if (hook->table->class->stop) + hook->table->class->stop(hook); + else + rt_stop_export_common(hook); +} + +void +rt_stop_export_common(struct rt_export_hook *hook) +{ /* Update export state */ rt_set_export_state(hook, TES_STOP); + /* Reset the event as the stopped event */ + hook->event.hook = hook->table->class->done; + /* Run the stopped event */ - ev_schedule(hook->event); + rt_send_export_event(hook); } /** @@ -1916,15 +2294,45 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct rt_import_request *req) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == req->hook) - e->rte.flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + RT_LOCKED(hook->table, tab) + { + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&tab->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + tab->rr_counter -= (hook->stale_set - hook->stale_pruned - 1); + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + /* Setting a new value of the stale modifier */ + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + tab->rr_counter++; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); + + } } /** @@ -1936,43 +2344,21 @@ rt_refresh_begin(rtable *t, struct rt_import_request *req) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct rt_import_request *req) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; - - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) - { - e->rte.flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; - - if (prune) - rt_schedule_prune(t); -} + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); -void -rt_modify_stale(rtable *t, struct rt_import_request *req) -{ - int prune = 0; + RT_LOCKED(hook->table, tab) + { + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) - { - e->rte.flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; + rt_schedule_prune(tab); - if (prune) - rt_schedule_prune(t); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); + } } /** @@ -1997,8 +2383,11 @@ rte_dump(struct rte_storage *e) * This function dumps contents of a given routing table to debug output. */ void -rt_dump(rtable *t) +rt_dump(rtable *tp) { + RT_LOCKED(tp, t) + { + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); @@ -2010,6 +2399,8 @@ rt_dump(rtable *t) } FIB_WALK_END; debug("\n"); + + } } /** @@ -2031,11 +2422,14 @@ rt_dump_all(void) } void -rt_dump_hooks(rtable *tab) +rt_dump_hooks(rtable *tp) { + RT_LOCKED(tp, tab) + { + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); - debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", - tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" nhu_state=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->use_count, tab->rt_count); debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); @@ -2049,15 +2443,18 @@ rt_dump_hooks(rtable *tab) ih->last_state_change, ih->import_state, ih->stopped); } - struct rt_export_hook *eh; - WALK_LIST(eh, tab->exporter.hooks) + struct rt_table_export_hook *eh; + WALK_LIST(eh, tab->exporter.e.hooks) { - eh->req->dump_req(eh->req); + eh->h.req->dump_req(eh->h.req); debug(" Export hook %p requested by %p:" - " refeed_pending=%u last_state_change=%t export_state=%u stopped=%p\n", - eh, eh->req, eh->refeed_pending, eh->last_state_change, eh->export_state, eh->stopped); + " refeed_pending=%u last_state_change=%t export_state=%u\n", + eh, eh->h.req, eh->refeed_pending, eh->h.last_state_change, + atomic_load_explicit(&eh->h.export_state, memory_order_relaxed)); } debug("\n"); + + } } void @@ -2076,206 +2473,236 @@ rt_dump_hooks_all(void) } static inline void -rt_schedule_hcu(rtable *tab) -{ - if (tab->hcu_scheduled) - return; - - tab->hcu_scheduled = 1; - ev_schedule(tab->rt_event); -} - -static inline void -rt_schedule_nhu(rtable *tab) +rt_schedule_nhu(struct rtable_private *tab) { - if (tab->nhu_state == NHU_CLEAN) - ev_schedule(tab->rt_event); - - /* state change: - * NHU_CLEAN -> NHU_SCHEDULED - * NHU_RUNNING -> NHU_DIRTY - */ - tab->nhu_state |= NHU_SCHEDULED; + if (tab->nhu_corked) + { + if (!(tab->nhu_corked & NHU_SCHEDULED)) + tab->nhu_corked |= NHU_SCHEDULED; + } + else if (!(tab->nhu_state & NHU_SCHEDULED)) + { + rt_trace(tab, D_EVENTS, "Scheduling NHU"); + + /* state change: + * NHU_CLEAN -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_DIRTY + */ + if ((tab->nhu_state |= NHU_SCHEDULED) == NHU_SCHEDULED) + birdloop_flag(tab->loop, RTF_NHU); + } } void -rt_schedule_prune(rtable *tab) +rt_schedule_prune(struct rtable_private *tab) { if (tab->prune_state == 0) - ev_schedule(tab->rt_event); + birdloop_flag(tab->loop, RTF_CLEANUP); /* state change 0->1, 2->3 */ tab->prune_state |= 1; } - static void -rt_event(void *ptr) +rt_export_used(struct rt_table_exporter *e, const char *who, const char *why) { - rtable *tab = ptr; + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, e); + ASSERT_DIE(RT_IS_LOCKED(tab)); - rt_lock_table(tab); + rt_trace(tab, D_EVENTS, "Export cleanup requested by %s %s", who, why); - if (tab->hcu_scheduled) - rt_update_hostcache(tab); + if (tab->export_used) + return; - if (tab->nhu_state) - rt_next_hop_update(tab); + tab->export_used = 1; + birdloop_flag(tab->loop, RTF_CLEANUP); +} - if (tab->prune_state) - rt_prune_table(tab); +static void +rt_flag_handler(struct birdloop_flag_handler *fh, u32 flags) +{ + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, fh, fh)), tab) + { + ASSERT_DIE(birdloop_inside(tab->loop)); + rt_lock_table(tab); - rt_unlock_table(tab); -} + if (flags & RTF_NHU) + rt_next_hop_update(tab); + if (flags & RTF_EXPORT) + rt_kick_export_settle(tab); -static inline btime -rt_settled_time(rtable *tab) -{ - ASSUME(tab->base_settle_time != 0); + if (flags & RTF_CLEANUP) + { + if (tab->export_used) + rt_export_cleanup(tab); + + if (tab->prune_state) + rt_prune_table(tab); + } - return MIN(tab->last_rt_change + tab->config->min_settle_time, - tab->base_settle_time + tab->config->max_settle_time); + rt_unlock_table(tab); + } } static void -rt_settle_timer(timer *t) +rt_prune_timer(timer *t) { - rtable *tab = t->data; - - if (!tab->base_settle_time) - return; + RT_LOCKED((rtable *) t->data, tab) + if (tab->gc_counter >= tab->config->gc_threshold) + rt_schedule_prune(tab); +} - btime settled_time = rt_settled_time(tab); - if (current_time() < settled_time) - { - tm_set(tab->settle_timer, settled_time); +static void +rt_kick_prune_timer(struct rtable_private *tab) +{ + /* Return if prune is already scheduled */ + if (tm_active(tab->prune_timer) || (tab->prune_state & 1)) return; - } - - /* Settled */ - tab->base_settle_time = 0; - struct rt_subscription *s; - WALK_LIST(s, tab->subscribers) - s->hook(s); + /* Randomize GC period to +/- 50% */ + btime gc_period = tab->config->gc_period; + gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period); + tm_start_in(tab->prune_timer, gc_period, tab->loop); } + static void -rt_kick_settle_timer(rtable *tab) +rt_flowspec_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { - tab->base_settle_time = current_time(); + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rtable *dst_pub = ln->dst; + ASSUME(rt_is_flow(dst_pub)); + struct rtable_private *dst = RT_LOCK(dst_pub); - if (!tab->settle_timer) - tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0); + /* No need to inspect it further if recalculation is already scheduled */ + if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY) + || !trie_match_net(dst->flowspec_trie, net)) + { + RT_UNLOCK(dst_pub); + rpe_mark_seen_all(req->hook, first, NULL); + return; + } - if (!tm_active(tab->settle_timer)) - tm_set(tab->settle_timer, rt_settled_time(tab)); -} + /* This net may affect some flowspecs, check the actual change */ + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; -static inline void -rt_schedule_notify(rtable *tab) -{ - if (EMPTY_LIST(tab->subscribers)) - return; + RPE_WALK(first, rpe, NULL) + { + rpe_mark_seen(req->hook, rpe); + new_best = rpe->new_best; + } - if (tab->base_settle_time) - return; + /* Yes, something has actually changed. Schedule the update. */ + if (o != RTE_VALID_OR_NULL(new_best)) + rt_schedule_nhu(dst); - rt_kick_settle_timer(tab); + RT_UNLOCK(dst_pub); } -void -rt_subscribe(rtable *tab, struct rt_subscription *s) +static void +rt_flowspec_dump_req(struct rt_export_request *req) { - s->tab = tab; - rt_lock_table(tab); - DBG("rt_subscribe(%s)\n", tab->name); - add_tail(&tab->subscribers, &s->n); + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + debug(" Flowspec link for table %s (%p)\n", ln->dst->name, req); } -void -rt_unsubscribe(struct rt_subscription *s) +static void +rt_flowspec_log_state_change(struct rt_export_request *req, u8 state) { - rem_node(&s->n); - rt_unlock_table(s->tab); + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rt_trace(ln->dst, D_STATES, "Flowspec link from %s export state changed to %s", + ln->src->name, rt_export_state_name(state)); } static struct rt_flowspec_link * -rt_flowspec_find_link(rtable *src, rtable *dst) +rt_flowspec_find_link(struct rtable_private *src, rtable *dst) { - struct rt_flowspec_link *ln; - WALK_LIST(ln, src->flowspec_links) - if ((ln->src == src) && (ln->dst == dst)) - return ln; + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, src->exporter.e.hooks, h.n) + switch (atomic_load_explicit(&hook->h.export_state, memory_order_acquire)) + { + case TES_HUNGRY: + case TES_FEEDING: + case TES_READY: + if (hook->h.req->export_one == rt_flowspec_export_one) + { + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, hook->h.req); + if (ln->dst == dst) + return ln; + } + } return NULL; } void -rt_flowspec_link(rtable *src, rtable *dst) +rt_flowspec_link(rtable *src_pub, rtable *dst_pub) { - ASSERT(rt_is_ip(src)); - ASSERT(rt_is_flow(dst)); + ASSERT(rt_is_ip(src_pub)); + ASSERT(rt_is_flow(dst_pub)); - struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + int lock_dst = 0; - if (!ln) + RT_LOCKED(src_pub, src) { - rt_lock_table(src); - rt_lock_table(dst); + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst_pub); - ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link)); - ln->src = src; - ln->dst = dst; - add_tail(&src->flowspec_links, &ln->n); + if (!ln) + { + pool *p = src->rp; + ln = mb_allocz(p, sizeof(struct rt_flowspec_link)); + ln->src = src_pub; + ln->dst = dst_pub; + ln->req = (struct rt_export_request) { + .name = mb_sprintf(p, "%s.flowspec.notifier", dst_pub->name), + .list = &global_work_list, + .trace_routes = src->config->debug, + .dump_req = rt_flowspec_dump_req, + .log_state_change = rt_flowspec_log_state_change, + .export_one = rt_flowspec_export_one, + }; + + rt_table_export_start_locked(src, &ln->req); + + lock_dst = 1; + } + + ln->uc++; } - ln->uc++; + if (lock_dst) + rt_lock_table(dst_pub); } -void -rt_flowspec_unlink(rtable *src, rtable *dst) +static void +rt_flowspec_link_stopped(struct rt_export_request *req) { - struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); - - ASSERT(ln && (ln->uc > 0)); + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rtable *dst = ln->dst; - ln->uc--; - - if (!ln->uc) - { - rem_node(&ln->n); - mb_free(ln); - - rt_unlock_table(src); - rt_unlock_table(dst); - } + mb_free(ln); + rt_unlock_table(dst); } -static void -rt_flowspec_notify(rtable *src, net *net) +void +rt_flowspec_unlink(rtable *src, rtable *dst) { - /* Only IP tables are src links */ - ASSERT(rt_is_ip(src)); - struct rt_flowspec_link *ln; - WALK_LIST(ln, src->flowspec_links) + RT_LOCKED(src, t) { - rtable *dst = ln->dst; - ASSERT(rt_is_flow(dst)); + ln = rt_flowspec_find_link(t, dst); - /* No need to inspect it further if recalculation is already active */ - if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY)) - continue; + ASSERT(ln && (ln->uc > 0)); - if (trie_match_net(dst->flowspec_trie, net->n.addr)) - rt_schedule_nhu(dst); + if (!--ln->uc) + rt_stop_export(&ln->req, rt_flowspec_link_stopped); } } static void -rt_flowspec_reset_trie(rtable *tab) +rt_flowspec_reset_trie(struct rtable_private *tab) { linpool *lp = tab->flowspec_trie->lp; int ipv4 = tab->flowspec_trie->ipv4; @@ -2288,14 +2715,13 @@ rt_flowspec_reset_trie(rtable *tab) static void rt_free(resource *_r) { - rtable *r = (rtable *) _r; + struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r); + + DOMAIN_FREE(rtable, r->lock); DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - if (r->internal) - return; - r->config->table = NULL; rem_node(&r->n); @@ -2306,7 +2732,6 @@ rt_free(resource *_r) fib_free(&r->fib); hmap_free(&r->id_map); rfree(r->rt_event); - rfree(r->settle_timer); mb_free(r); */ } @@ -2314,26 +2739,44 @@ rt_free(resource *_r) static void rt_res_dump(resource *_r) { - rtable *r = (rtable *) _r; + struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r); + debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", r->name, net_label[r->addr_type], r->rt_count, r->use_count); } static struct resclass rt_class = { .name = "Routing table", - .size = sizeof(struct rtable), + .size = sizeof(rtable), .free = rt_free, .dump = rt_res_dump, .lookup = NULL, .memsize = NULL, }; +static const struct rt_exporter_class rt_table_exporter_class = { + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, +}; + +void +rt_exporter_init(struct rt_exporter *e) +{ + init_list(&e->hooks); +} + +static struct idm rtable_idm; +uint rtable_max_id = 0; + rtable * rt_setup(pool *pp, struct rtable_config *cf) { + ASSERT_DIE(birdloop_inside(&main_birdloop)); + pool *p = rp_newf(pp, "Routing table %s", cf->name); - rtable *t = ralloc(p, &rt_class); + struct rtable_private *t = ralloc(p, &rt_class); t->rp = p; t->rte_slab = sl_new(p, sizeof(struct rte_storage)); @@ -2341,6 +2784,11 @@ rt_setup(pool *pp, struct rtable_config *cf) t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; + t->id = idm_alloc(&rtable_idm); + if (t->id >= rtable_max_id) + rtable_max_id = t->id + 1; + + t->lock = DOMAIN_NEW(rtable, t->name); fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); @@ -2352,38 +2800,48 @@ rt_setup(pool *pp, struct rtable_config *cf) t->fib.init = net_init_with_trie; } - init_list(&t->flowspec_links); + init_list(&t->imports); - t->exporter = (struct rt_exporter) { - .addr_type = t->addr_type, - .start = rt_table_export_start, - .stop = rt_table_export_stop, - .done = rt_table_export_done, - }; - init_list(&t->exporter.hooks); + hmap_init(&t->id_map, p, 1024); + hmap_set(&t->id_map, 0); - if (!(t->internal = cf->internal)) - { - init_list(&t->imports); + t->fh = (struct birdloop_flag_handler) { .hook = rt_flag_handler, }; + t->nhu_uncork_event = ev_new_init(p, rt_nhu_uncork, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); + t->last_rt_change = t->gc_time = current_time(); - hmap_init(&t->id_map, p, 1024); - hmap_set(&t->id_map, 0); + t->export_settle = SETTLE_INIT(&cf->export_settle, rt_announce_exports, NULL); - init_list(&t->subscribers); + t->exporter = (struct rt_table_exporter) { + .e = { + .class = &rt_table_exporter_class, + .addr_type = t->addr_type, + .rp = t->rp, + }, + .next_seq = 1, + }; - t->rt_event = ev_new_init(p, rt_event, t); - t->last_rt_change = t->gc_time = current_time(); + rt_exporter_init(&t->exporter.e); - t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + init_list(&t->exporter.pending); - if (rt_is_flow(t)) - { - t->flowspec_trie = f_new_trie(lp_new_default(p), 0); - t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); - } + t->cork_threshold = cf->cork_threshold; + + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + if (rt_is_flow(RT_PUB(t))) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } - return t; + /* Start the service thread */ + t->loop = birdloop_new(p, DOMAIN_ORDER(service), mb_sprintf(p, "Routing tahle %s", t->name)); + birdloop_enter(t->loop); + birdloop_flag_set_handler(t->loop, &t->fh); + birdloop_leave(t->loop); + + return RT_PUB(t); } /** @@ -2397,9 +2855,11 @@ rt_init(void) { rta_init(); rt_table_pool = rp_new(&root_pool, "Routing tables"); - rte_update_pool = lp_new_default(rt_table_pool); init_list(&routing_tables); init_list(&deleted_routing_tables); + ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); + rt_cork.run = (event) { .hook = rt_cork_release_hook }; + idm_init(&rtable_idm, rt_table_pool, 256); } @@ -2418,7 +2878,7 @@ rt_init(void) * iteration. */ static void -rt_prune_table(rtable *tab) +rt_prune_table(struct rtable_private *tab) { struct fib_iterator *fit = &tab->prune_fit; int limit = 2000; @@ -2426,7 +2886,7 @@ rt_prune_table(rtable *tab) struct rt_import_hook *ih; node *n, *x; - DBG("Pruning route table %s\n", tab->name); + rt_trace(tab, D_STATES, "Pruning"); #ifdef DEBUGGING fib_check(&tab->fib); #endif @@ -2440,10 +2900,20 @@ rt_prune_table(rtable *tab) WALK_LIST2(ih, n, tab->imports, n) if (ih->import_state == TIS_STOP) rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; + tab->gc_counter = 0; + tab->gc_time = current_time(); + if (tab->prune_trie) { /* Init prefix trie pruning */ @@ -2459,30 +2929,25 @@ again: if (limit <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); + birdloop_flag(tab->loop, RTF_CLEANUP); return; } for (struct rte_storage *e=n->routes; e; e=e->next) { - if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) - { - rte_discard(n, &e->rte); - limit--; - - goto rescan; - } - - if (e->rte.flags & REF_MODIFY) + struct rt_import_hook *s = e->rte.sender; + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { - rte_modify(n, &e->rte); + rte_recalculate(tab, e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; } } - if (!n->routes) /* Orphaned FIB entry */ + if (!n->routes && !n->first) /* Orphaned FIB entry */ { FIB_ITERATE_PUT(fit); fib_delete(&tab->fib, n); @@ -2497,15 +2962,16 @@ again: } FIB_ITERATE_END; + rt_trace(tab, D_EVENTS, "Prune done, scheduling export timer"); + rt_kick_export_settle(tab); + #ifdef DEBUGGING fib_check(&tab->fib); #endif - tab->gc_counter = 0; - tab->gc_time = current_time(); - /* state change 2->0, 3->1 */ - tab->prune_state &= 1; + if (tab->prune_state &= 1) + birdloop_flag(tab->loop, RTF_CLEANUP); if (tab->trie_new) { @@ -2538,18 +3004,205 @@ again: } } - rt_prune_sources(); + uint flushed_channels = 0; /* Close flushed channels */ WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_FLUSHING) { - rt_set_import_state(ih, TIS_CLEARED); - ih->stopped(ih->req); - rem_node(&ih->n); - mb_free(ih); - rt_unlock_table(tab); + ih->flush_seq = tab->exporter.next_seq; + rt_set_import_state(ih, TIS_WAITING); + flushed_channels++; + tab->rr_counter--; } + else if (ih->stale_pruning != ih->stale_pruned) + { + tab->rr_counter -= (ih->stale_pruned - ih->stale_pruning); + ih->stale_pruned = ih->stale_pruning; + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } + + /* In some cases, we may want to directly proceed to export cleanup */ + if (EMPTY_LIST(tab->exporter.e.hooks) && flushed_channels) + rt_export_cleanup(tab); +} + +static void +rt_export_cleanup(struct rtable_private *tab) +{ + tab->export_used = 0; + + u64 min_seq = ~((u64) 0); + struct rt_pending_export *last_export_to_free = NULL; + struct rt_pending_export *first = tab->exporter.first; + int want_prune = 0; + + struct rt_table_export_hook *eh; + node *n; + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) + { + switch (atomic_load_explicit(&eh->h.export_state, memory_order_acquire)) + { + case TES_DOWN: + case TES_HUNGRY: + continue; + + case TES_READY: + { + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (!last) + /* No last export means that the channel has exported nothing since last cleanup */ + goto done; + + else if (min_seq > last->seq) + { + min_seq = last->seq; + last_export_to_free = last; + } + continue; + } + + default: + /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */ + goto done; + } + } + + tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + + rt_trace(tab, D_STATES, "Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", + first ? first->seq : 0, + tab->exporter.first ? tab->exporter.first->seq : 0, + min_seq); + + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) + { + if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY) + continue; + + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (last == last_export_to_free) + { + /* This may fail when the channel managed to export more inbetween. This is OK. */ + atomic_compare_exchange_strong_explicit( + &eh->last_export, &last, NULL, + memory_order_release, + memory_order_relaxed); + + DBG("store hook=%p last_export=NULL\n", eh); + } + } + + while (first && (first->seq <= min_seq)) + { + ASSERT_DIE(first->new || first->old); + + const net_addr *n = first->new ? + first->new->rte.net : + first->old->rte.net; + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + + ASSERT_DIE(net->first == first); + + if (first == net->last) + /* The only export here */ + net->last = net->first = NULL; + else + /* First is now the next one */ + net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + + want_prune += !net->routes && !net->first; + + /* For now, the old route may be finally freed */ + if (first->old) + { + rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed"); + hmap_clear(&tab->id_map, first->old->rte.id); + rte_free(first->old); + } + +#ifdef LOCAL_DEBUG + memset(first, 0xbd, sizeof(struct rt_pending_export)); +#endif + + struct rt_export_block *reb = HEAD(tab->exporter.pending); + ASSERT_DIE(reb == PAGE_HEAD(first)); + + u32 pos = (first - &reb->export[0]); + u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); + ASSERT_DIE(pos < end); + + struct rt_pending_export *next = NULL; + + if (++pos < end) + next = &reb->export[pos]; + else + { + rem_node(&reb->n); + +#ifdef LOCAL_DEBUG + memset(reb, 0xbe, page_size); +#endif + + free_page(reb); + + if (EMPTY_LIST(tab->exporter.pending)) + { + rt_trace(tab, D_EVENTS, "Resetting export seq"); + + node *n; + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) + { + if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY) + continue; + + ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); + bmap_reset(&eh->h.seq_map, 1024); + } + + tab->exporter.next_seq = 1; + } + else + { + reb = HEAD(tab->exporter.pending); + next = &reb->export[0]; + } + } + + first = next; + } + + rt_check_cork_low(tab); + +done:; + struct rt_import_hook *ih; node *x; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_WAITING) + if (!first || (first->seq >= ih->flush_seq)) + { + ih->import_state = TIS_CLEARED; + ev_send(ih->req->list, &ih->announce_event); + } + + if ((tab->gc_counter += want_prune) >= tab->config->gc_threshold) + rt_kick_prune_timer(tab); + + if (tab->export_used) + birdloop_flag(tab->loop, RTF_CLEANUP); + + if (EMPTY_LIST(tab->exporter.pending)) + settle_cancel(&tab->export_settle); +} + +static void +rt_cork_release_hook(void *data UNUSED) +{ + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); } /** @@ -2566,7 +3219,7 @@ again: * */ struct f_trie * -rt_lock_trie(rtable *tab) +rt_lock_trie(struct rtable_private *tab) { ASSERT(tab->trie); @@ -2583,7 +3236,7 @@ rt_lock_trie(rtable *tab) * It may free the trie and schedule next trie pruning. */ void -rt_unlock_trie(rtable *tab, struct f_trie *trie) +rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie) { ASSERT(trie); @@ -2623,8 +3276,29 @@ rt_preconfig(struct config *c) { init_list(&c->tables); - rt_new_table(cf_get_symbol("master4"), NET_IP4); - rt_new_table(cf_get_symbol("master6"), NET_IP6); + c->def_tables[NET_IP4] = cf_define_symbol(cf_get_symbol("master4"), SYM_TABLE, table, NULL); + c->def_tables[NET_IP6] = cf_define_symbol(cf_get_symbol("master6"), SYM_TABLE, table, NULL); +} + +void +rt_postconfig(struct config *c) +{ + uint num_tables = list_length(&c->tables); + btime def_gc_period = 400 MS * num_tables; + def_gc_period = MAX(def_gc_period, 10 S); + def_gc_period = MIN(def_gc_period, 600 S); + + struct rtable_config *rc; + WALK_LIST(rc, c->tables) + if (rc->gc_period == (uint) -1) + rc->gc_period = (uint) def_gc_period; + + for (uint net_type = 0; net_type < NET_MAX; net_type++) + if (c->def_tables[net_type] && !c->def_tables[net_type]->table) + { + c->def_tables[net_type]->class = SYM_VOID; + c->def_tables[net_type] = NULL; + } } @@ -2634,7 +3308,7 @@ rt_preconfig(struct config *c) */ void -ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) +ea_set_hostentry(ea_list **to, rtable *dep, rtable *src, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { struct { struct adata ad; @@ -2642,7 +3316,8 @@ ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr g u32 labels[lnum]; } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); - head->he = rt_get_hostentry(tab, gw, ll, dep); + RT_LOCKED(src, tab) + head->he = rt_get_hostentry(tab, gw, ll, dep); memcpy(head->labels, labels, lnum * sizeof(u32)); ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( @@ -2771,17 +3446,16 @@ rta_next_hop_outdated(ea_list *a) ? head : NULL; } -static inline struct rte_storage * -rt_next_hop_update_rte(rtable *tab, net *n, rte *old) +static inline int +rt_next_hop_update_rte(rte *old, rte *new) { struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); if (!head) - return NULL; - - rte e0 = *old; - rta_apply_hostentry(&e0.attrs, head); + return 0; - return rte_store(&e0, n, tab); + *new = *old; + rta_apply_hostentry(&new->attrs, head); + return 1; } static inline void @@ -2839,7 +3513,6 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * { ASSERT(rt_is_ip(tab_ip)); ASSERT(rt_is_flow(tab_flow)); - ASSERT(tab_ip->trie); /* RFC 8955 6. a) Flowspec has defined dst prefix */ if (!net_flow_has_dst_prefix(n)) @@ -2859,32 +3532,45 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * else net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n)); - /* Find best-match BGP unicast route for flowspec dst prefix */ - net *nb = net_route(tab_ip, &dst); - const rte *rb = nb ? &nb->routes->rte : NULL; + rte rb = {}; + net_addr_union nau; + RT_LOCKED(tab_ip, tip) + { + ASSERT(tip->trie); + /* Find best-match BGP unicast route for flowspec dst prefix */ + net *nb = net_route(tip, &dst); + if (nb) + { + rb = RTE_COPY_VALID(nb->routes); + rta_clone(rb.attrs); + net_copy(&nau.n, nb->n.addr); + rb.net = &nau.n; + } + } /* Register prefix to trie for tracking further changes */ int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; - trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); + RT_LOCKED(tab_flow, tfl) + trie_add_prefix(tfl->flowspec_trie, &dst, (rb.net ? rb.net->pxlen : 0), max_pxlen); /* No best-match BGP route -> no flowspec */ - if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + if (!rb.attrs || (rt_get_source_attr(&rb) != RTS_BGP)) return FLOWSPEC_INVALID; /* Find ORIGINATOR_ID values */ u32 orig_a = ea_get_int(a, "bgp_originator_id", 0); - u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb.attrs, "bgp_originator_id", 0); /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( ea_get_ip(a, &ea_gen_from, IPA_NONE), - ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE) + ea_get_ip(rb.attrs, &ea_gen_from, IPA_NONE) ))) return FLOWSPEC_INVALID; /* Find ASN of the best-match route, for use in next checks */ - u32 asn_b = rta_get_first_asn(rb->attrs); + u32 asn_b = rta_get_first_asn(rb.attrs); if (!asn_b) return FLOWSPEC_INVALID; @@ -2893,51 +3579,53 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * return FLOWSPEC_INVALID; /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ - TRIE_WALK(tab_ip->trie, subnet, &dst) + RT_LOCKED(tab_ip, tip) { - net *nc = net_find_valid(tab_ip, &subnet); - if (!nc) - continue; + TRIE_WALK(tip->trie, subnet, &dst) + { + net *nc = net_find_valid(tip, &subnet); + if (!nc) + continue; - const rte *rc = &nc->routes->rte; - if (rt_get_source_attr(rc) != RTS_BGP) - return FLOWSPEC_INVALID; + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + RT_RETURN(tip, FLOWSPEC_INVALID); - if (rta_get_first_asn(rc->attrs) != asn_b) - return FLOWSPEC_INVALID; + if (rta_get_first_asn(rc->attrs) != asn_b) + RT_RETURN(tip, FLOWSPEC_INVALID); + } + TRIE_WALK_END; } - TRIE_WALK_END; return FLOWSPEC_VALID; } #endif /* CONFIG_BGP */ -static struct rte_storage * -rt_flowspec_update_rte(rtable *tab, net *n, rte *r) +static int +rt_flowspec_update_rte(rtable *tab, rte *r, rte *new) { #ifdef CONFIG_BGP if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) - return NULL; + return 0; struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); if (!bc->base_table) - return NULL; + return 0; struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); enum flowspec_valid old = rt_get_flowspec_valid(r), - valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + valid = rt_flowspec_check(bc->base_table, tab, r->net, r->attrs, p->is_interior); if (old == valid) - return NULL; - - rte new = *r; - ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid); + return 0; - return rte_store(&new, n, tab); + *new = *r; + ea_set_attr_u32(&new->attrs, &ea_gen_flowspec_valid, 0, valid); + return 1; #else - return NULL; + return 0; #endif } @@ -2972,10 +3660,9 @@ rt_flowspec_resolve_rte(rte *r, struct channel *c) } static inline int -rt_next_hop_update_net(rtable *tab, net *n) +rt_next_hop_update_net(struct rtable_private *tab, net *n) { - struct rte_storage *new; - int count = 0; + uint count = 0; int is_flow = net_is_flow(n->n.addr); struct rte_storage *old_best = n->routes; @@ -2983,44 +3670,90 @@ rt_next_hop_update_net(rtable *tab, net *n) return 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (is_flow || rta_next_hop_outdated(e->rte.attrs)) - count++; + count++; if (!count) return 0; struct rte_multiupdate { - struct rte_storage *old, *new; - } *updates = alloca(sizeof(struct rte_multiupdate) * count); + struct rte_storage *old, *new_stored; + rte new; + } *updates = tmp_allocz(sizeof(struct rte_multiupdate) * (count+1)); + + struct rt_pending_export *last_pending = n->last; - int pos = 0; + uint pos = 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (is_flow || rta_next_hop_outdated(e->rte.attrs)) - { - struct rte_storage *new = is_flow - ? rt_flowspec_update_rte(tab, n, &e->rte) - : rt_next_hop_update_rte(tab, n, &e->rte); + updates[pos++].old = e; + + /* This is an exceptional place where table can be unlocked while keeping its data: + * the reason why this is safe is that NHU must be always run from the same + * thread as cleanup routines, therefore the only real problem may arise when + * some importer does a change on this particular net (destination) while NHU + * is being computed. Statistically, this should almost never happen. In such + * case, we just drop all the computed changes and do it once again. + * */ + RT_UNLOCK(tab); + + uint mod = 0; + if (is_flow) + for (uint i = 0; i < pos; i++) + mod += rt_flowspec_update_rte(RT_PUB(tab), &updates[i].old->rte, &updates[i].new); - if (!new) - continue; + else + for (uint i = 0; i < pos; i++) + mod += rt_next_hop_update_rte(&updates[i].old->rte, &updates[i].new); - /* Call a pre-comparison hook */ - /* Not really an efficient way to compute this */ - if (e->rte.src->proto->rte_recalculate) - e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + RT_LOCK(RT_PUB(tab)); - updates[pos++] = (struct rte_multiupdate) { - .old = e, - .new = new, - }; + if (!mod) + return 0; + + /* Something has changed inbetween, retry NHU. */ + if (last_pending != n->last) + return rt_next_hop_update_net(tab, n); + + /* Now we reconstruct the original linked list */ + struct rte_storage **nptr = &n->routes; + for (uint i = 0; i < pos; i++) + { + updates[i].old->next = NULL; - /* Replace the route in the list */ - new->next = e->next; - *k = e = new; + struct rte_storage *put; + if (updates[i].new.attrs) + put = updates[i].new_stored = rte_store(&updates[i].new, n, tab); + else + put = updates[i].old; + + *nptr = put; + nptr = &put->next; + } + *nptr = NULL; + + /* Call the pre-comparison hooks */ + for (uint i = 0; i < pos; i++) + if (updates[i].new_stored) + { + /* Get a new ID for the route */ + updates[i].new_stored->rte.lastmod = current_time(); + updates[i].new_stored->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, updates[i].new_stored->rte.id); + + /* Call a pre-comparison hook */ + /* Not really an efficient way to compute this */ + if (updates[i].old->rte.src->owner->rte_recalculate) + updates[i].old->rte.src->owner->rte_recalculate(tab, n, &updates[i].new_stored->rte, &updates[i].old->rte, &old_best->rte); } - ASSERT_DIE(pos <= count); - count = pos; +#if DEBUGGING + { + uint t = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + t++; + ASSERT_DIE(t == pos); + ASSERT_DIE(pos == count); + } +#endif /* Find the new best route */ struct rte_storage **new_best = NULL; @@ -3031,7 +3764,7 @@ rt_next_hop_update_net(rtable *tab, net *n) } /* Relink the new best route to the first position */ - new = *new_best; + struct rte_storage *new = *new_best; if (new != n->routes) { *new_best = new->next; @@ -3039,86 +3772,166 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } + uint total = 0; /* Announce the changes */ - for (int i=0; i<count; i++) + for (uint i=0; i<count; i++) { - _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); - const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; - rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); - rte_announce_i(tab, n, updates[i].new, updates[i].old, new, old_best); + if (!updates[i].new_stored) + continue; + + _Bool nb = (new->rte.src == updates[i].new.src), ob = (i == 0); + const char *best_indicator[2][2] = { + { "autoupdated", "autoupdated [-best]" }, + { "autoupdated [+best]", "autoupdated [best]" } + }; + rt_rte_trace_in(D_ROUTES, updates[i].new.sender->req, &updates[i].new, best_indicator[nb][ob]); + rte_announce(tab, n, updates[i].new_stored, updates[i].old, new, old_best); + + total++; } - for (int i=0; i<count; i++) - rte_free(updates[i].old); + return total; +} - return count; +static void +rt_nhu_uncork(void *_tab) +{ + RT_LOCKED((rtable *) _tab, tab) + { + ASSERT_DIE(tab->nhu_corked); + ASSERT_DIE(tab->nhu_state == 0); + + /* Reset the state */ + tab->nhu_state = tab->nhu_corked; + tab->nhu_corked = 0; + rt_trace(tab, D_STATES, "Next hop updater uncorked"); + + birdloop_flag(tab->loop, RTF_NHU); + } } static void -rt_next_hop_update(rtable *tab) +rt_next_hop_update(struct rtable_private *tab) { - struct fib_iterator *fit = &tab->nhu_fit; - int max_feed = 32; + ASSERT_DIE(birdloop_inside(tab->loop)); + + if (tab->nhu_corked) + return; + + if (!tab->nhu_state) + return; - if (tab->nhu_state == NHU_CLEAN) + /* Check corkedness */ + if (rt_cork_check(tab->nhu_uncork_event)) + { + rt_trace(tab, D_STATES, "Next hop updater corked"); + if ((tab->nhu_state & NHU_RUNNING) + && !EMPTY_LIST(tab->exporter.pending)) + rt_kick_export_settle(tab); + + tab->nhu_corked = tab->nhu_state; + tab->nhu_state = 0; return; + } + struct fib_iterator *fit = &tab->nhu_fit; + int max_feed = 32; + + /* Initialize a new run */ if (tab->nhu_state == NHU_SCHEDULED) - { - FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = NHU_RUNNING; + { + FIB_ITERATE_INIT(fit, &tab->fib); + tab->nhu_state = NHU_RUNNING; - if (tab->flowspec_trie) - rt_flowspec_reset_trie(tab); - } + if (tab->flowspec_trie) + rt_flowspec_reset_trie(tab); + } + /* Walk the fib one net after another */ FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); + birdloop_flag(tab->loop, RTF_NHU); return; } + lp_state lps; + lp_save(tmp_linpool, &lps); max_feed -= rt_next_hop_update_net(tab, n); + lp_restore(tmp_linpool, &lps); } FIB_ITERATE_END; + /* Finished NHU, cleanup */ + rt_trace(tab, D_EVENTS, "NHU done, scheduling export timer"); + rt_kick_export_settle(tab); + /* State change: * NHU_DIRTY -> NHU_SCHEDULED * NHU_RUNNING -> NHU_CLEAN */ - tab->nhu_state &= 1; + if ((tab->nhu_state &= NHU_SCHEDULED) == NHU_SCHEDULED) + birdloop_flag(tab->loop, RTF_NHU); +} + +void +rt_new_default_table(struct symbol *s) +{ + for (uint addr_type = 0; addr_type < NET_MAX; addr_type++) + if (s == new_config->def_tables[addr_type]) + { + s->table = rt_new_table(s, addr_type); + return; + } - if (tab->nhu_state != NHU_CLEAN) - ev_schedule(tab->rt_event); + bug("Requested an unknown new default table: %s", s->name); } +struct rtable_config * +rt_get_default_table(struct config *cf, uint addr_type) +{ + struct symbol *ts = cf->def_tables[addr_type]; + if (!ts) + return NULL; + + if (!ts->table) + rt_new_default_table(ts); + + return ts->table; +} struct rtable_config * rt_new_table(struct symbol *s, uint addr_type) { - /* Hack that allows to 'redefine' the master table */ - if ((s->class == SYM_TABLE) && - (s->table == new_config->def_tables[addr_type]) && - ((addr_type == NET_IP4) || (addr_type == NET_IP6))) - return s->table; - struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config)); - cf_define_symbol(s, SYM_TABLE, table, c); + if (s == new_config->def_tables[addr_type]) + s->table = c; + else + cf_define_symbol(s, SYM_TABLE, table, c); + c->name = s->name; c->addr_type = addr_type; - c->gc_max_ops = 1000; - c->gc_min_time = 5; - c->min_settle_time = 1 S; - c->max_settle_time = 20 S; + c->gc_threshold = 1000; + c->gc_period = (uint) -1; /* set in rt_postconfig() */ + c->cork_threshold.low = 128; + c->cork_threshold.high = 512; + c->export_settle = (struct settle_config) { + .min = 1 MS, + .max = 100 MS, + }; + c->export_rr_settle = (struct settle_config) { + .min = 100 MS, + .max = 3 S, + }; + c->debug = new_config->table_debug; add_tail(&new_config->tables, &c->n); /* First table of each type is kept as default */ if (! new_config->def_tables[addr_type]) - new_config->def_tables[addr_type] = c; + new_config->def_tables[addr_type] = s; return c; } @@ -3132,8 +3945,9 @@ rt_new_table(struct symbol *s, uint addr_type) * configuration. */ void -rt_lock_table(rtable *r) +rt_lock_table_priv(struct rtable_private *r, const char *file, uint line) { + rt_trace(r, D_STATES, "Locked at %s:%d", file, line); r->use_count++; } @@ -3146,20 +3960,71 @@ rt_lock_table(rtable *r) * for deletion by configuration changes. */ void -rt_unlock_table(rtable *r) +rt_unlock_table_priv(struct rtable_private *r, const char *file, uint line) { + rt_trace(r, D_STATES, "Unlocked at %s:%d", file, line); if (!--r->use_count && r->deleted) - { - struct config *conf = r->deleted; + /* Stop the service thread to finish this up */ + ev_send(&global_event_list, ev_new_init(r->rp, rt_shutdown, r)); +} - /* Delete the routing table by freeing its pool */ - rt_shutdown(r); - config_del_obstacle(conf); - } +static void +rt_shutdown(void *tab_) +{ + struct rtable_private *r = tab_; + birdloop_stop(r->loop, rt_delete, r); } +static void +rt_delete(void *tab_) +{ + birdloop_enter(&main_birdloop); + + /* We assume that nobody holds the table reference now as use_count is zero. + * Anyway the last holder may still hold the lock. Therefore we lock and + * unlock it the last time to be sure that nobody is there. */ + struct rtable_private *tab = RT_LOCK((rtable *) tab_); + struct config *conf = tab->deleted; + + RT_UNLOCK(RT_PUB(tab)); + + rfree(tab->rp); + config_del_obstacle(conf); + + birdloop_leave(&main_birdloop); +} + + +static void +rt_check_cork_low(struct rtable_private *tab) +{ + if (!tab->cork_active) + return; + + if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq)) + { + tab->cork_active = 0; + rt_cork_release(); + + rt_trace(tab, D_STATES, "Uncorked"); + } +} + +static void +rt_check_cork_high(struct rtable_private *tab) +{ + if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) + { + tab->cork_active = 1; + rt_cork_acquire(); + + rt_trace(tab, D_STATES, "Corked"); + } +} + + static int -rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) +rt_reconfigure(struct rtable_private *tab, struct rtable_config *new, struct rtable_config *old) { if ((new->addr_type != old->addr_type) || (new->sorted != old->sorted) || @@ -3167,10 +4032,26 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old return 0; DBG("\t%s: same\n", new->name); - new->table = tab; + new->table = RT_PUB(tab); tab->name = new->name; tab->config = new; + if (tab->hostcache) + tab->hostcache->req.trace_routes = new->debug; + + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, tab->exporter.e.hooks, h.n) + if (hook->h.req->export_one == rt_flowspec_export_one) + hook->h.req->trace_routes = new->debug; + + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; } @@ -3203,19 +4084,36 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *tab = o->table; + struct rtable_private *tab = RT_LOCK(o->table); + if (tab->deleted) + { + RT_UNLOCK(tab); continue; + } r = rt_find_table_config(new, o->name); if (r && !new->shutdown && rt_reconfigure(tab, r, o)) + { + RT_UNLOCK(tab); continue; + } DBG("\t%s: deleted\n", o->name); tab->deleted = old; config_add_obstacle(old); rt_lock_table(tab); + + if (tab->hostcache) + { + rt_stop_export(&tab->hostcache->req, NULL); + if (ev_get_list(&tab->hostcache->update) == &rt_cork.queue) + ev_postpone(&tab->hostcache->update); + } + rt_unlock_table(tab); + + RT_UNLOCK(tab); } } @@ -3229,6 +4127,84 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } +static void +rt_feed_done(struct rt_export_hook *c) +{ + c->event.hook = rt_export_hook; + + rt_set_export_state(c, TES_READY); + + rt_send_export_event(c); +} + +#define MAX_FEED_BLOCK 1024 +typedef struct { + uint cnt, pos; + union { + struct rt_pending_export *rpe; + struct { + rte **feed; + uint *start; + }; + }; +} rt_feed_block; + +static int +rt_prepare_feed(struct rt_table_export_hook *c, net *n, rt_feed_block *b) +{ + if (n->routes) + { + if (c->h.req->export_bulk) + { + uint cnt = rte_feed_count(n); + if (b->cnt && (b->cnt + cnt > MAX_FEED_BLOCK)) + return 0; + + if (!b->cnt) + { + b->feed = tmp_alloc(sizeof(rte *) * MAX(MAX_FEED_BLOCK, cnt)); + b->start = tmp_alloc(sizeof(uint) * ((cnt >= MAX_FEED_BLOCK) ? 2 : (MAX_FEED_BLOCK + 2 - cnt))); + } + + rte_feed_obtain(n, &b->feed[b->cnt], cnt); + b->start[b->pos++] = b->cnt; + b->cnt += cnt; + } + else if (b->pos == MAX_FEED_BLOCK) + return 0; + else + { + if (!b->pos) + b->rpe = tmp_alloc(sizeof(struct rt_pending_export) * MAX_FEED_BLOCK); + + b->rpe[b->pos++] = (struct rt_pending_export) { .new = n->routes, .new_best = n->routes }; + } + } + + rpe_mark_seen_all(&c->h, n->first, NULL); + return 1; +} + +static void +rt_process_feed(struct rt_table_export_hook *c, rt_feed_block *b) +{ + if (!b->pos) + return; + + if (c->h.req->export_bulk) + { + b->start[b->pos] = b->cnt; + for (uint p = 0; p < b->pos; p++) + { + rte **feed = &b->feed[b->start[p]]; + c->h.req->export_bulk(c->h.req, feed[0]->net, NULL, feed, b->start[p+1] - b->start[p]); + } + } + else + for (uint p = 0; p < b->pos; p++) + c->h.req->export_one(c->h.req, b->rpe[p].new->rte.net, &b->rpe[p]); +} + /** * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed @@ -3241,59 +4217,67 @@ rt_commit(struct config *new, struct config *old) static void rt_feed_by_fib(void *data) { - struct rt_export_hook *c = data; - + struct rt_table_export_hook *c = data; struct fib_iterator *fit = &c->feed_fit; - int max_feed = 256; + rt_feed_block block = {}; - ASSERT(c->export_state == TES_FEEDING); + ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { FIB_ITERATE_START(&tab->fib, fit, net, n) { - if (max_feed <= 0) + if ((c->h.req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->h.req->addr)) + { + if (!rt_prepare_feed(c, n, &block)) { FIB_ITERATE_PUT(fit); - ev_schedule_work(c->event); + RT_UNLOCK(tab); + rt_process_feed(c, &block); + rt_send_export_event(&c->h); return; } - - ASSERT(c->export_state == TES_FEEDING); - - if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) - max_feed -= rt_feed_net(c, n); + } } FIB_ITERATE_END; + } - rt_set_export_state(c, TES_READY); + rt_process_feed(c, &block); + rt_feed_done(&c->h); } static void rt_feed_by_trie(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { ASSERT_DIE(c->walk_state); struct f_trie_walk_state *ws = c->walk_state; - int max_feed = 256; + ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); - ASSERT_DIE(c->export_state == TES_FEEDING); + do { + if (!c->walk_last.type) + continue; - net_addr addr; - while (trie_walk_next(ws, &addr)) - { - net *n = net_find(tab, &addr); + net *n = net_find(tab, &c->walk_last); if (!n) continue; - if ((max_feed -= rt_feed_net(c, n)) <= 0) + if (!rt_prepare_feed(c, n, &block)) + { + RT_UNLOCK(tab); + rt_process_feed(c, &block); + rt_send_export_event(&c->h); return; - - ASSERT_DIE(c->export_state == TES_FEEDING); + } } + while (trie_walk_next(ws, &c->walk_last)); rt_unlock_trie(tab, c->walk_lock); c->walk_lock = NULL; @@ -3301,69 +4285,56 @@ rt_feed_by_trie(void *data) mb_free(c->walk_state); c->walk_state = NULL; - rt_set_export_state(c, TES_READY); + c->walk_last.type = 0; + + } + + rt_process_feed(c, &block); + rt_feed_done(&c->h); } static void rt_feed_equal(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + net *n; - ASSERT_DIE(c->export_state == TES_FEEDING); - ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_EQUAL); + + if (n = net_find(tab, c->h.req->addr)) + ASSERT_DIE(rt_prepare_feed(c, n, &block)); + } - net *n = net_find(tab, c->req->addr); if (n) - rt_feed_net(c, n); + rt_process_feed(c, &block); - rt_set_export_state(c, TES_READY); + rt_feed_done(&c->h); } static void rt_feed_for(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + net *n; - ASSERT_DIE(c->export_state == TES_FEEDING); - ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); - - net *n = net_route(tab, c->req->addr); - if (n) - rt_feed_net(c, n); - - rt_set_export_state(c, TES_READY); -} + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_FOR); -static uint -rt_feed_net(struct rt_export_hook *c, net *n) -{ - if (c->req->export_bulk) - { - uint count = rte_feed_count(n); - if (count) - { - rte_update_lock(); - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - struct rt_pending_export rpe = { .new_best = n->routes }; - c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); - rte_update_unlock(); - } - return count; - } + if (n = net_route(tab, c->h.req->addr)) + ASSERT_DIE(rt_prepare_feed(c, n, &block)); + } - if (n->routes && rte_is_valid(&n->routes->rte)) - { - rte_update_lock(); - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - rte_update_unlock(); - return 1; - } + if (n) + rt_process_feed(c, &block); - return 0; + rt_feed_done(&c->h); } @@ -3371,7 +4342,6 @@ rt_feed_net(struct rt_export_hook *c, net *n) * Import table */ - void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, reload_req, req); @@ -3493,7 +4463,56 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) } static void -rt_init_hostcache(rtable *tab) +hc_notify_dump_req(struct rt_export_request *req) +{ + debug(" Table %s (%p)\n", req->name, req); +} + +static void +hc_notify_log_state_change(struct rt_export_request *req, u8 state) +{ + struct hostcache *hc = SKIP_BACK(struct hostcache, req, req); + rt_trace((rtable *) hc->update.data, D_STATES, "HCU Export state changed to %s", rt_export_state_name(state)); +} + +static void +hc_notify_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) +{ + struct hostcache *hc = SKIP_BACK(struct hostcache, req, req); + + /* No interest in this update, mark seen only */ + int interested = 1; + RT_LOCKED((rtable *) hc->update.data, tab) + if (ev_active(&hc->update) || !trie_match_net(hc->trie, net)) + { + rpe_mark_seen_all(req->hook, first, NULL); + interested = 0; + } + + if (!interested) + return; + + /* This net may affect some hostentries, check the actual change */ + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; + + RPE_WALK(first, rpe, NULL) + { + rpe_mark_seen(req->hook, rpe); + new_best = rpe->new_best; + } + + /* Yes, something has actually changed. Do the hostcache update. */ + if (o != RTE_VALID_OR_NULL(new_best)) + RT_LOCKED((rtable *) hc->update.data, tab) + if ((atomic_load_explicit(&req->hook->export_state, memory_order_acquire) == TES_READY) + && !ev_active(&hc->update)) + ev_send_loop(tab->loop, &hc->update); +} + + +static void +rt_init_hostcache(struct rtable_private *tab) { struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache)); init_list(&hc->hostentries); @@ -3505,11 +4524,27 @@ rt_init_hostcache(rtable *tab) hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); + hc->update = (event) { + .hook = rt_update_hostcache, + .data = tab, + }; + + hc->req = (struct rt_export_request) { + .name = mb_sprintf(tab->rp, "%s.hcu.notifier", tab->name), + .list = &global_work_list, + .trace_routes = tab->config->debug, + .dump_req = hc_notify_dump_req, + .log_state_change = hc_notify_log_state_change, + .export_one = hc_notify_export_one, + }; + + rt_table_export_start_locked(tab, &hc->req); + tab->hostcache = hc; } static void -rt_free_hostcache(rtable *tab) +rt_free_hostcache(struct rtable_private *tab) { struct hostcache *hc = tab->hostcache; @@ -3531,16 +4566,6 @@ rt_free_hostcache(rtable *tab) */ } -static void -rt_notify_hostcache(rtable *tab, net *net) -{ - if (tab->hcu_scheduled) - return; - - if (trie_match_net(tab->hostcache->trie, net->n.addr)) - rt_schedule_hcu(tab); -} - static int if_local_addr(ip_addr a, struct iface *i) { @@ -3564,14 +4589,14 @@ rt_get_igp_metric(const rte *rt) if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; - if (rt->src->proto->rte_igp_metric) - return rt->src->proto->rte_igp_metric(rt); + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); return IGP_METRIC_UNKNOWN; } static int -rt_update_hostentry(rtable *tab, struct hostentry *he) +rt_update_hostentry(struct rtable_private *tab, struct hostentry *he) { ea_list *old_src = he->src; int direct = 0; @@ -3589,9 +4614,12 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) { struct rte_storage *e = n->routes; ea_list *a = e->rte.attrs; - pxlen = n->n.addr->pxlen; + u32 pref = rt_get_preference(&e->rte); - if (ea_find(a, &ea_gen_hostentry)) + for (struct rte_storage *ee = n->routes; ee; ee = ee->next) + if (rte_is_valid(&ee->rte) && + (rt_get_preference(&ee->rte) >= pref) && + ea_find(ee->rte.attrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3599,6 +4627,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } + pxlen = n->n.addr->pxlen; + eattr *nhea = ea_find(a, &ea_gen_nexthop); ASSERT_DIE(nhea); struct nexthop_adata *nhad = (void *) nhea->u.ptr; @@ -3632,9 +4662,28 @@ done: } static void -rt_update_hostcache(rtable *tab) +rt_update_hostcache(void *data) { + rtable **nhu_pending; + + RT_LOCKED((rtable *) data, tab) + { + struct hostcache *hc = tab->hostcache; + + /* Shutdown shortcut */ + if (!hc->req.hook) + RT_RETURN(tab); + + if (rt_cork_check(&hc->update)) + { + rt_trace(tab, D_STATES, "Hostcache update corked"); + RT_RETURN(tab); + } + + /* Destination schedule map */ + nhu_pending = tmp_allocz(sizeof(rtable *) * rtable_max_id); + struct hostentry *he; node *n, *x; @@ -3652,14 +4701,18 @@ rt_update_hostcache(rtable *tab) } if (rt_update_hostentry(tab, he)) - rt_schedule_nhu(he->tab); + nhu_pending[he->tab->id] = he->tab; } + } - tab->hcu_scheduled = 0; + for (uint i=0; i<rtable_max_id; i++) + if (nhu_pending[i]) + RT_LOCKED(nhu_pending[i], dst) + rt_schedule_nhu(dst); } static struct hostentry * -rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) +rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; |