diff options
Diffstat (limited to 'nest/rt-table.c')
-rw-r--r-- | nest/rt-table.c | 3163 |
1 files changed, 1686 insertions, 1477 deletions
diff --git a/nest/rt-table.c b/nest/rt-table.c index 390b3277..ada54396 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -45,28 +45,84 @@ #include "lib/string.h" #include "lib/alloca.h" -#ifdef CONFIG_BGP -#include "proto/bgp/bgp.h" -#endif +#include <stdatomic.h> pool *rt_table_pool; -static slab *rte_slab; -static linpool *rte_update_pool; - list routing_tables; -static void rt_free_hostcache(rtable *tab); -static void rt_notify_hostcache(rtable *tab, net *net); -static void rt_update_hostcache(rtable *tab); -static void rt_next_hop_update(rtable *tab); -static inline void rt_prune_table(rtable *tab); -static inline void rt_schedule_notify(rtable *tab); +/* Data structures for export journal */ +#define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) + +struct rt_export_block { + node n; + _Atomic u32 end; + _Atomic _Bool not_last; + struct rt_pending_export export[]; +}; + +static void rt_free_hostcache(rtable_private *tab); +static void rt_notify_hostcache(rtable_private *tab, net *net); +static void rt_update_hostcache(void *tab); +static void rt_next_hop_update(void *tab); +static inline void rt_prune_table(void *tab); +static inline void rt_schedule_notify(rtable_private *tab); +static void rt_feed_channel(void *); + +static inline void rt_export_used(rtable_private *tab); +static void rt_export_cleanup(void *tab); + +const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", +}; + +const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_HUNGRY] = "HUNGRY", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" +}; + +const char *rt_import_state_name(u8 state) +{ + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; +} + +const char *rt_export_state_name(u8 state) +{ + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; +} + +struct event_cork rt_cork; + +static inline void +rte_update_lock(struct channel *c) +{ + c->rte_update_nest_cnt++; +} +static inline void +rte_update_unlock(struct channel *c) +{ + if (!--c->rte_update_nest_cnt) + lp_flush(c->rte_update_pool); +} /* Like fib_route(), but skips empty net entries */ static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +net_route_ip4(rtable_private *t, net_addr_ip4 *n) { net *r; @@ -80,7 +136,7 @@ net_route_ip4(rtable *t, net_addr_ip4 *n) } static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +net_route_ip6(rtable_private *t, net_addr_ip6 *n) { net *r; @@ -94,7 +150,7 @@ net_route_ip6(rtable *t, net_addr_ip6 *n) } static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +net_route_ip6_sadr(rtable_private *t, net_addr_ip6_sadr *n) { struct fib_node *fn; @@ -133,7 +189,7 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) } void * -net_route(rtable *tab, const net_addr *n) +net_route(rtable_private *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); @@ -162,12 +218,15 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4(rtable *t, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; int anything = 0; + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + while (1) { for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) @@ -175,11 +234,14 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + { + RT_UNLOCK(tab); return ROA_VALID; + } } } @@ -190,16 +252,20 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) ip4_clrbit(&n.prefix, n.pxlen); } + RT_UNLOCK(tab); return anything ? ROA_INVALID : ROA_UNKNOWN; } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6(rtable *t, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; int anything = 0; + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + while (1) { for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) @@ -207,11 +273,14 @@ net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + { + RT_UNLOCK(tab); return ROA_VALID; + } } } @@ -222,6 +291,7 @@ net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) ip6_clrbit(&n.prefix, n.pxlen); } + RT_UNLOCK(tab); return anything ? ROA_INVALID : ROA_UNKNOWN; } @@ -256,253 +326,55 @@ net_roa_check(rtable *tab, const net_addr *n, u32 asn) * @net: network node * @src: route source * - * The rte_find() function returns a route for destination @net - * which is from route source @src. + * The rte_find() function returns a pointer to a route for destination @net + * which is from route source @src. List end pointer is returned if no route is found. */ -rte * +static struct rte_storage ** rte_find(net *net, struct rte_src *src) { - rte *e = net->routes; - - while (e && e->attrs->src != src) - e = e->next; - return e; -} + struct rte_storage **e = &net->routes; -/** - * rte_get_temp - get a temporary &rte - * @a: attributes to assign to the new route (a &rta; in case it's - * un-cached, rte_update() will create a cached copy automatically) - * - * Create a temporary &rte and bind it with the attributes @a. - * Also set route preference to the default preference set for - * the protocol. - */ -rte * -rte_get_temp(rta *a) -{ - rte *e = sl_alloc(rte_slab); + while ((*e) && (*e)->rte.src != src) + e = &(*e)->next; - e->attrs = a; - e->id = 0; - e->flags = 0; - e->pref = 0; return e; } -rte * -rte_do_cow(rte *r) -{ - rte *e = sl_alloc(rte_slab); - - memcpy(e, r, sizeof(rte)); - e->attrs = rta_clone(r->attrs); - e->flags = 0; - return e; -} -/** - * rte_cow_rta - get a private writable copy of &rte with writable &rta - * @r: a route entry to be copied - * @lp: a linpool from which to allocate &rta - * - * rte_cow_rta() takes a &rte and prepares it and associated &rta for - * modification. There are three possibilities: First, both &rte and &rta are - * private copies, in that case they are returned unchanged. Second, &rte is - * private copy, but &rta is cached, in that case &rta is duplicated using - * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case - * both structures are duplicated by rte_do_cow() and rta_do_cow(). - * - * Note that in the second case, cached &rta loses one reference, while private - * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs, - * nexthops, ...) with it. To work properly, original shared &rta should have - * another reference during the life of created private copy. - * - * Result: a pointer to the new writable &rte with writable &rta. - */ -rte * -rte_cow_rta(rte *r, linpool *lp) +struct rte_storage * +rte_store(const rte *r, net *net, rtable_private *tab) { - if (!rta_is_cached(r->attrs)) - return r; - - r = rte_cow(r); - rta *a = rta_do_cow(r->attrs, lp); - rta_free(r->attrs); - r->attrs = a; - return r; -} + struct rte_storage *e = sl_alloc(tab->rte_slab); + e->rte = *r; + e->rte.net = net->n.addr; -/** - * rte_init_tmp_attrs - initialize temporary ea_list for route - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @max: maximum number of added temporary attribus - * - * This function is supposed to be called from make_tmp_attrs() and - * store_tmp_attrs() hooks before rte_make_tmp_attr() / rte_store_tmp_attr() - * functions. It allocates &ea_list with length for @max items for temporary - * attributes and puts it on top of eattrs stack. - */ -void -rte_init_tmp_attrs(rte *r, linpool *lp, uint max) -{ - struct ea_list *e = lp_alloc(lp, sizeof(struct ea_list) + max * sizeof(eattr)); - - e->next = r->attrs->eattrs; - e->flags = EALF_SORTED | EALF_TEMP; - e->count = 0; + rt_lock_source(e->rte.src); - r->attrs->eattrs = e; -} + if (e->rte.attrs->cached) + e->rte.attrs = rta_clone(e->rte.attrs); + else + e->rte.attrs = rta_lookup(e->rte.attrs); -/** - * rte_make_tmp_attr - make temporary eattr from private route fields - * @r: route entry to be modified - * @id: attribute ID - * @type: attribute type - * @val: attribute value (u32 or adata ptr) - * - * This function is supposed to be called from make_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is supposed to - * be defined (based on route pflags) and if so then it fills &eattr field in - * preallocated temporary &ea_list on top of route @r eattrs stack. - * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs(). - */ -void -rte_make_tmp_attr(rte *r, uint id, uint type, uintptr_t val) -{ - if (r->pflags & EA_ID_FLAG(id)) - { - ea_list *e = r->attrs->eattrs; - eattr *a = &e->attrs[e->count++]; - a->id = id; - a->type = type; - a->flags = 0; - - if (type & EAF_EMBEDDED) - a->u.data = (u32) val; - else - a->u.ptr = (struct adata *) val; - } + return e; } /** - * rte_store_tmp_attr - store temporary eattr to private route fields - * @r: route entry to be modified - * @id: attribute ID - * - * This function is supposed to be called from store_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is defined in - * route @r eattrs stack, updates route pflags accordingly, undefines it by - * filling &eattr field in preallocated temporary &ea_list on top of the eattrs - * stack, and returns the value. Caller is supposed to store it in the - * appropriate private field. + * rte_free - delete a &rte + * @e: &struct rte_storage to be deleted + * @tab: the table which the rte belongs to * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs() + * rte_free() deletes the given &rte from the routing table it's linked to. */ -uintptr_t -rte_store_tmp_attr(rte *r, uint id) -{ - ea_list *e = r->attrs->eattrs; - eattr *a = ea_find(e->next, id); - - if (a) - { - e->attrs[e->count++] = (struct eattr) { .id = id, .type = EAF_TYPE_UNDEF }; - r->pflags |= EA_ID_FLAG(id); - return (a->type & EAF_EMBEDDED) ? a->u.data : (uintptr_t) a->u.ptr; - } - else - { - r->pflags &= ~EA_ID_FLAG(id); - return 0; - } -} -/** - * rte_make_tmp_attrs - prepare route by adding all relevant temporary route attributes - * @r: route entry to be modified (may be replaced if COW) - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta (may be NULL) - * - * This function expands privately stored protocol-dependent route attributes - * to a uniform &eattr / &ea_list representation. It is essentially a wrapper - * around protocol make_tmp_attrs() hook, which does some additional work like - * ensuring that route @r is writable. - * - * The route @r may be read-only (with %REF_COW flag), in that case rw copy is - * obtained by rte_cow() and @r is replaced. If @rte is originally rw, it may be - * directly modified (and it is never copied). - * - * If the @old_attrs ptr is supplied, the function obtains another reference of - * old cached &rta, that is necessary in some cases (see rte_cow_rta() for - * details). It is freed by rte_store_tmp_attrs(), or manually by rta_free(). - * - * Generally, if caller ensures that @r is read-only (e.g. in route export) then - * it may ignore @old_attrs (and set it to NULL), but must handle replacement of - * @r. If caller ensures that @r is writable (e.g. in route import) then it may - * ignore replacement of @r, but it must handle @old_attrs. - */ void -rte_make_tmp_attrs(rte **r, linpool *lp, rta **old_attrs) +rte_free(struct rte_storage *e, rtable_private *tab) { - void (*make_tmp_attrs)(rte *r, linpool *lp); - make_tmp_attrs = (*r)->attrs->src->proto->make_tmp_attrs; - - if (!make_tmp_attrs) - return; - - /* We may need to keep ref to old attributes, will be freed in rte_store_tmp_attrs() */ - if (old_attrs) - *old_attrs = rta_is_cached((*r)->attrs) ? rta_clone((*r)->attrs) : NULL; - - *r = rte_cow_rta(*r, lp); - make_tmp_attrs(*r, lp); + rt_unlock_source(e->rte.src); + rta_free(e->rte.attrs); + sl_free(tab->rte_slab, e); } -/** - * rte_store_tmp_attrs - store temporary route attributes back to private route fields - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta - * - * This function stores temporary route attributes that were expanded by - * rte_make_tmp_attrs() back to private route fields and also undefines them. - * It is essentially a wrapper around protocol store_tmp_attrs() hook, which - * does some additional work like shortcut if there is no change and cleanup - * of @old_attrs reference obtained by rte_make_tmp_attrs(). - */ -static void -rte_store_tmp_attrs(rte *r, linpool *lp, rta *old_attrs) -{ - void (*store_tmp_attrs)(rte *rt, linpool *lp); - store_tmp_attrs = r->attrs->src->proto->store_tmp_attrs; - - if (!store_tmp_attrs) - return; - - ASSERT(!rta_is_cached(r->attrs)); - - /* If there is no new ea_list, we just skip the temporary ea_list */ - ea_list *ea = r->attrs->eattrs; - if (ea && (ea->flags & EALF_TEMP)) - r->attrs->eattrs = ea->next; - else - store_tmp_attrs(r, lp); - - /* Free ref we got in rte_make_tmp_attrs(), have to do rta_lookup() first */ - r->attrs = rta_lookup(r->attrs); - rta_free(old_attrs); -} - - static int /* Actually better or at least as good as */ rte_better(rte *new, rte *old) { @@ -513,20 +385,20 @@ rte_better(rte *new, rte *old) if (!rte_is_valid(new)) return 0; - if (new->pref > old->pref) + if (new->attrs->pref > old->attrs->pref) return 1; - if (new->pref < old->pref) + if (new->attrs->pref < old->attrs->pref) return 0; - if (new->attrs->src->proto->proto != old->attrs->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols * have the same preference, try to break the tie by comparing addresses. Not too * useful, but keeps the ordering of routes unambiguous. */ - return new->attrs->src->proto->proto > old->attrs->src->proto->proto; + return new->src->owner->class > old->src->owner->class; } - if (better = new->attrs->src->proto->rte_better) + if (better = new->src->owner->class->rte_better) return better(new, old); return 0; } @@ -539,268 +411,300 @@ rte_mergable(rte *pri, rte *sec) if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->pref != sec->pref) + if (pri->attrs->pref != sec->attrs->pref) return 0; - if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->attrs->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s.%s %c %s %N %s", - c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr, - rta_dest_name(e->attrs->dest)); + log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s%s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, e->id, + rta_dest_name(e->attrs->dest), + rte_is_filtered(e) ? " (filtered)" : ""); } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_in(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '>', msg); + rte_trace(c->in_req.name, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_out(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '<', msg); + rte_trace(c->out_req.name, e, '<', msg); +} + +static inline void +rt_rte_trace_in(uint flag, struct rt_import_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '>', msg); +} + +#if 0 +// seems to be unused at all +static inline void +rt_rte_trace_out(uint flag, struct rt_export_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '<', msg); +} +#endif + +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTES_OR_NULL(e))) + count++; + return count; +} + +static void +rte_feed_obtain(net *n, struct rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTES_OR_NULL(e))) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + ASSERT_DIE(i == count); } static rte * -export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent) +export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; - rte *rt; - int v; + struct channel_export_stats *stats = &c->export_stats; - rt = rt0; - *rt_free = NULL; + /* Do nothing if we have already rejected the route */ + if (silent && bmap_test(&c->export_reject_map, rt->id)) + goto reject_noset; - v = p->preexport ? p->preexport(p, &rt, pool) : 0; + int v = p->preexport ? p->preexport(c, rt) : 0; if (v < 0) { if (silent) - goto reject; + goto reject_noset; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) - rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); goto reject; + } if (v > 0) { if (!silent) - rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); goto accept; } - rte_make_tmp_attrs(&rt, pool, NULL); - v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, &rt, pool, + (f_run(filter, rt, pool, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { if (silent) goto reject; - stats->exp_updates_filtered++; - rte_trace_out(D_FILTERS, c, rt, "filtered out"); + stats->updates_filtered++; + channel_rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } -#ifdef CONFIG_PIPE - /* Pipes need rte with stored tmpattrs, remaining protocols need expanded tmpattrs */ - if (p->proto == &proto_pipe) - rte_store_tmp_attrs(rt, pool, NULL); -#endif - accept: - if (rt != rt0) - *rt_free = rt; + /* We have accepted the route */ + bmap_clear(&c->export_reject_map, rt->id); return rt; reject: + /* We have rejected the route by filter */ + bmap_set(&c->export_reject_map, rt->id); + +reject_noset: /* Discard temporary rte */ - if (rt != rt0) - rte_free(rt); return NULL; } static inline rte * -export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent) +export_filter(struct channel *c, rte *rt, int silent) { - return export_filter_(c, rt0, rt_free, rte_update_pool, silent); + return export_filter_(c, rt, c->rte_update_pool, silent); } +void do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old); + static void -do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed) +do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { - struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct channel_export_stats *stats = &c->export_stats; - if (refeed && new) + if (c->refeeding && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { - stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); + stats->updates_rejected++; + channel_rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } - /* Apply export table */ - if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed)) - return; - - if (new) - stats->exp_updates_accepted++; - else - stats->exp_withdraws_accepted++; + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); + /* Store route export state */ if (old) - { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->exp_routes++; - } + + /* Apply export table */ + if (c->out_table) + rte_import(&c->out_table->push, net, new, old ? old->src : new->src); + else + do_rt_notify_direct(c, net, new, old); +} + +void +do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old) +{ + struct proto *p = c->proto; + struct channel_export_stats *stats = &c->export_stats; + + if (new) + stats->updates_accepted++; + else + stats->withdraws_accepted++; if (p->debug & D_ROUTES) { if (new && old) - rte_trace_out(D_ROUTES, c, new, "replaced"); + channel_rte_trace_out(D_ROUTES, c, new, "replaced"); else if (new) - rte_trace_out(D_ROUTES, c, new, "added"); + channel_rte_trace_out(D_ROUTES, c, new, "added"); else if (old) - rte_trace_out(D_ROUTES, c, old, "removed"); + channel_rte_trace_out(D_ROUTES, c, old, "removed"); } p->rt_notify(p, c, net, new, old); } static void -rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) +rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { - // struct proto *p = c->proto; - rte *new_free = NULL; - - if (new) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; - if (new) - new = export_filter(c, new, &new_free, 0); + new = export_filter(c, new, 0); if (old && !bmap_test(&c->export_map, old->id)) old = NULL; + if (old && (old->sender == c->in_req.hook)) + bug("bad-behaved pipe"); + if (!new && !old) return; - do_rt_notify(c, net, new, old, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + do_rt_notify(c, net, new, old); } static void -rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed) +channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *rpe) { - // struct proto *p = c->proto; - rte *new_best = NULL; - rte *old_best = NULL; - rte *new_free = NULL; - int new_first = 0; - - /* - * We assume that there are no changes in net route order except (added) - * new_changed and (removed) old_changed. Therefore, the function is not - * compatible with deterministic_med (where nontrivial reordering can happen - * as a result of a route change) and with recomputation of recursive routes - * due to next hop update (where many routes can be changed in one step). - * - * Note that we need this assumption just for optimizations, we could just - * run full new_best recomputation otherwise. - * - * There are three cases: - * feed or old_best is old_changed -> we need to recompute new_best - * old_best is before new_changed -> new_best is old_best, ignore - * old_best is after new_changed -> try new_changed, otherwise old_best - */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (net->routes) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rpe_mark_seen(req->hook, rpe); + if (rpe->old) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} - /* Find old_best - either old_changed, or route for net->routes */ - if (old_changed && bmap_test(&c->export_map, old_changed->id)) - old_best = old_changed; - else +void +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + rte_update_lock(c); + + rte nb0, *new_best = NULL; + const rte *old_best = NULL; + + for (uint i = 0; i < count; i++) { - for (rte *r = net->routes; rte_is_valid(r); r = r->next) + if (!rte_is_valid(feed[i])) + continue; + + /* Has been already rejected, won't bother with it */ + if (!c->refeeding && bmap_test(&c->export_reject_map, feed[i]->id)) + continue; + + /* Previously exported */ + if (!old_best && bmap_test(&c->export_map, feed[i]->id)) { - if (bmap_test(&c->export_map, r->id)) + /* is still best */ + if (!new_best) { - old_best = r; - break; + DBG("rt_notify_accepted: idempotent\n"); + goto done; } - /* Note if new_changed found before old_best */ - if (r == new_changed) - new_first = 1; + /* is superseded */ + old_best = feed[i]; + break; } - } - /* Find new_best */ - if ((new_changed == old_changed) || (old_best == old_changed)) - { - /* Feed or old_best changed -> find first accepted by filters */ - for (rte *r = net->routes; rte_is_valid(r); r = r->next) - if (new_best = export_filter(c, r, &new_free, 0)) - break; + /* Have no new best route yet */ + if (!new_best) + { + /* Try this route not seen before */ + nb0 = *feed[i]; + new_best = export_filter(c, &nb0, 0); + DBG("rt_notify_accepted: checking route id %u: %s\n", feed[i]->id, new_best ? "ok" : "no"); + } } - else + +done: + /* Check obsolete routes for previously exported */ + while (rpe) { - /* Other cases -> either new_changed, or old_best (and nothing changed) */ - if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0))) - new_best = new_changed; - else - return; + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); } - if (!new_best && !old_best) - return; - - do_rt_notify(c, net, new_best, old_best, refeed); + /* Nothing to export */ + if (new_best || old_best) + do_rt_notify(c, n, new_best, old_best); + else + DBG("rt_notify_accepted: nothing to export\n"); - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + rte_update_unlock(c); } @@ -811,38 +715,45 @@ nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) } rte * -rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent) +rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { + _Thread_local static rte rloc; + // struct proto *p = c->proto; struct nexthop *nhs = NULL; - rte *best0, *best, *rt0, *rt, *tmp; - - best0 = net->routes; - *rt_free = NULL; + rte *best0 = feed[0]; + rte *best = NULL; if (!rte_is_valid(best0)) return NULL; - best = export_filter_(c, best0, rt_free, pool, silent); + /* Already rejected, no need to re-run the filter */ + if (!c->refeeding && bmap_test(&c->export_reject_map, best0->id)) + return NULL; + + rloc = *best0; + best = export_filter_(c, &rloc, pool, silent); + + if (!best) + /* Best route doesn't pass the filter */ + return NULL; - if (!best || !rte_is_reachable(best)) + if (!rte_is_reachable(best)) + /* Unreachable routes can't be merged */ return best; - for (rt0 = best0->next; rt0; rt0 = rt0->next) + for (uint i = 1; i < count; i++) { - if (!rte_mergable(best0, rt0)) + if (!rte_mergable(best0, feed[i])) continue; - rt = export_filter_(c, rt0, &tmp, pool, 1); + rte tmp0 = *feed[i]; + rte *tmp = export_filter_(c, &tmp0, pool, 1); - if (!rt) + if (!tmp || !rte_is_reachable(tmp)) continue; - if (rte_is_reachable(rt)) - nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); - - if (tmp) - rte_free(tmp); + nhs = nexthop_merge_rta(nhs, tmp->attrs, pool, c->merge_limit); } if (nhs) @@ -851,66 +762,208 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int if (nhs->next) { - best = rte_cow_rta(best, pool); + best->attrs = rta_cow(best->attrs, pool); nexthop_link(best->attrs, nhs); } } - if (best != best0) - *rt_free = best; - return best; } - -static void -rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, - rte *new_best, rte *old_best, int refeed) +void +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte *new_free = NULL; + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* We assume that all rte arguments are either NULL or rte_is_valid() */ - - /* This check should be done by the caller */ - if (!new_best && !old_best) - return; + rte_update_lock(c); + // struct proto *p = c->proto; +#if 0 /* TODO: Find whether this check is possible when processing multiple changes at once. */ /* Check whether the change is relevant to the merged route */ if ((new_best == old_best) && (new_changed != old_changed) && !rte_mergable(new_best, new_changed) && !rte_mergable(old_best, old_changed)) return; +#endif - if (new_best) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte *old_best = NULL; + /* Find old best route */ + for (uint i = 0; i < count; i++) + if (bmap_test(&c->export_map, feed[i]->id)) + { + old_best = feed[i]; + break; + } + + /* Check obsolete routes for previously exported */ + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); + } /* Prepare new merged route */ - if (new_best) - new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0); + rte *new_merged = count ? rt_export_merged(c, feed, count, c->rte_update_pool, 0) : NULL; - /* Check old merged route */ - if (old_best && !bmap_test(&c->export_map, old_best->id)) - old_best = NULL; + if (new_merged || old_best) + do_rt_notify(c, n, new_merged, old_best); - if (!new_best && !old_best) - return; + rte_update_unlock(c); +} - do_rt_notify(c, net, new_best, old_best, refeed); +void +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte_update_lock(c); + rte *old = RTES_OR_NULL(rpe->old_best); + struct rte_storage *new_best = rpe->new_best; - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_best = rpe->new_best; + rpe = rpe_next(rpe, NULL); + } + + if (&new_best->rte != old) + { + rte n0, *new = RTES_CLONE(new_best, &n0); + rt_notify_basic(c, net, new, old); + } + + rte_update_unlock(c); +} + +void +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte_update_lock(c); + struct rte_src *src = rpe->new ? rpe->new->rte.src : rpe->old->rte.src; + rte *old = RTES_OR_NULL(rpe->old); + struct rte_storage *new_any = rpe->new; + + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_any = rpe->new; + rpe = rpe_next(rpe, src); + } + + if (&new_any->rte != old) + { + rte n0, *new = RTES_CLONE(new_any, &n0); + rt_notify_basic(c, net, new, old); + } + + rte_update_unlock(c); +} + +void +rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte_update_lock(c); + + for (uint i=0; i<count; i++) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } + + rte_update_unlock(c); +} + +void +rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + bmap_set(&hook->seq_map, rpe->seq); +} + +struct rt_pending_export * +rpe_next(struct rt_pending_export *rpe, struct rte_src *src) +{ + struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + if (!next) + return NULL; + + if (!src) + return next; + + while (rpe = next) + if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src)) + return rpe; + else + next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + return NULL; } +static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); +static void +rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + if (bmap_test(&hook->seq_map, rpe->seq)) + goto seen; + + const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + + if (rpe->new) + hook->stats.updates_received++; + else + hook->stats.withdraws_received++; + + if (hook->req->export_one) + hook->req->export_one(hook->req, n, rpe); + else if (hook->req->export_bulk) + { + RT_LOCK(hook->table); + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) + { + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + RT_UNLOCK(hook->table); + hook->req->export_bulk(hook->req, n, rpe, feed, count); + } + else + bug("Export request must always provide an export method"); + +seen: + /* Get the next export if exists */ + hook->rpe_next = rt_next_export_fast(rpe); + + /* The last block may be available to free */ + if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) + { + RT_LOCK(hook->table); + rt_export_used(RT_PRIV(hook->table)); + RT_UNLOCK(hook->table); + } + + /* Releasing this export for cleanup routine */ + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); + atomic_store_explicit(&hook->last_export, rpe, memory_order_release); +} /** * rte_announce - announce a routing table change * @tab: table the route has been added to - * @type: type of route announcement (RA_UNDEF or RA_ANY) * @net: network in question * @new: the new or changed route * @old: the previous route replaced by the new one @@ -926,13 +979,6 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * and @new_best and @old_best describes best routes. Other routes are not * affected, but in sorted table the order of other routes might change. * - * Second, There is a bulk change of multiple routes in @net, with shared best - * route selection. In such case separate route changes are described using - * @type of %RA_ANY, with @new and @old specifying the changed route, while - * @new_best and @old_best are NULL. After that, another notification is done - * where @new_best and @old_best are filled (may be the same), but @new and @old - * are NULL. - * * The function announces the change to all associated channels. For each * channel, an appropriate preprocessing is done according to channel &ra_mode. * For example, %RA_OPTIMAL channels receive just changes of best routes. @@ -947,130 +993,304 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_storage *old, + struct rte_storage *new_best, struct rte_storage *old_best) { - if (!rte_is_valid(new)) - new = NULL; - - if (!rte_is_valid(old)) - old = NULL; - - if (!rte_is_valid(new_best)) + if (!new_best || !rte_is_valid(&new_best->rte)) new_best = NULL; - if (!rte_is_valid(old_best)) + if (!old_best || !rte_is_valid(&old_best->rte)) old_best = NULL; - if (!new && !old && !new_best && !old_best) + if (!new || !rte_is_valid(&new->rte)) + new = NULL; + + if (old && !rte_is_valid(&old->rte)) + { + /* Filtered old route isn't announced, should be freed immediately. */ + rte_free(old, tab); + old = NULL; + } + + if ((new == old) && (new_best == old_best)) return; if (new_best != old_best) { if (new_best) - new_best->sender->stats.pref_routes++; + new_best->rte.sender->stats.pref++; if (old_best) - old_best->sender->stats.pref_routes--; + old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); } + if (EMPTY_LIST(tab->exports) && EMPTY_LIST(tab->pending_exports)) + { + /* No export hook and no pending exports to cleanup. We may free the route immediately. */ + if (!old) + return; + + hmap_clear(&tab->id_map, old->rte.id); + rte_free(old, tab); + return; + } + + /* Get the pending export structure */ + struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; + u32 end = 0; + + if (!EMPTY_LIST(tab->pending_exports)) + { + rpeb = TAIL(tab->pending_exports); + end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); + if (end >= RT_PENDING_EXPORT_ITEMS) + { + ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS); + rpebsnl = rpeb; + + rpeb = NULL; + end = 0; + } + } + + if (!rpeb) + { + rpeb = alloc_page(); + *rpeb = (struct rt_export_block) {}; + add_tail(&tab->pending_exports, &rpeb->n); + } + + /* Fill the pending export */ + struct rt_pending_export *rpe = &rpeb->export[rpeb->end]; + *rpe = (struct rt_pending_export) { + .new = new, + .new_best = new_best, + .old = old, + .old_best = old_best, + .seq = tab->next_export_seq++, + }; + + DBG("rte_announce: table=%s net=%N new=%p from %p old=%p from %p new_best=%p old_best=%p seq=%lu\n", tab->name, net->n.addr, new, new ? new->sender : NULL, old, old ? old->sender : NULL, new_best, old_best, rpe->seq); + + ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); + + if (rpebsnl) + { + _Bool f = 0; + ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1, + memory_order_release, memory_order_relaxed)); + } + + /* Append to the same-network squasher list */ + if (net->last) + { + struct rt_pending_export *rpenull = NULL; + ASSERT_DIE(atomic_compare_exchange_strong_explicit( + &net->last->next, &rpenull, rpe, + memory_order_relaxed, + memory_order_relaxed)); + + } + + net->last = rpe; + + if (!net->first) + net->first = rpe; + + if (tab->first_export == NULL) + tab->first_export = rpe; + + if (!EMPTY_LIST(tab->exports) && + (tab->first_export->seq + tab->config->cork_limit <= tab->next_export_seq) && + !tab->cork_active) + { + if (config->table_debug) + log(L_TRACE "%s: cork activated", tab->name); + + ev_cork(&rt_cork); + tab->cork_active = 1; + } +} + +static struct rt_pending_export * +rt_next_export_fast(struct rt_pending_export *last) +{ + /* Get the whole export block and find our position in there. */ + struct rt_export_block *rpeb = PAGE_HEAD(last); + u32 pos = (last - &rpeb->export[0]); + u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire); + ASSERT_DIE(pos < end); + + /* Next is in the same block. */ + if (++pos < end) + return &rpeb->export[pos]; + + /* There is another block. */ + if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire)) + { + /* This is OK to do non-atomically because of the not_last flag. */ + rpeb = NODE_NEXT(rpeb); + return &rpeb->export[0]; + } + + /* There is nothing more. */ + return NULL; +} + +static struct rt_pending_export * +rt_next_export(struct rt_export_hook *hook, rtable_private *tab) +{ + /* As the table is locked, it is safe to reload the last export pointer */ + struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); + + /* It is still valid, let's reuse it */ + if (last) + return rt_next_export_fast(last); + + /* No, therefore we must process the table's first pending export */ + else + return tab->first_export; +} + +static inline void +rt_send_export_event(struct rt_export_hook *hook) +{ + ev_send(hook->req->list, hook->event); +} + +static void +rt_announce_exports(void *data) +{ + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + rt_schedule_notify(tab); - struct channel *c; node *n; - WALK_LIST2(c, n, tab->channels, table_node) + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exports, n) { - if (c->export_state == ES_DOWN) + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) continue; - if (type && (type != c->ra_mode)) - continue; + rt_send_export_event(c); + } +} - switch (c->ra_mode) +static void +rt_import_announce_exports(void *data) +{ + struct rt_import_hook *hook = data; + RT_LOCKED(hook->table, tab) + { + if (hook->import_state == TIS_CLEARED) { - case RA_OPTIMAL: - if (new_best != old_best) - rt_notify_basic(c, net, new_best, old_best, 0); - break; + rfree(hook->export_announce_event); - case RA_ANY: - if (new != old) - rt_notify_basic(c, net, new, old, 0); - break; + ev_send(hook->stopped->list, hook->stopped); + rem_node(&hook->n); + mb_free(hook); + rt_unlock_table(tab); + } + else + ev_send_loop(tab->loop, tab->announce_event); + } +} - case RA_ACCEPTED: - rt_notify_accepted(c, net, new, old, 0); - break; +static struct rt_pending_export * +rt_last_export(rtable_private *tab) +{ + struct rt_pending_export *rpe = NULL; - case RA_MERGED: - rt_notify_merged(c, net, new, old, new_best, old_best, 0); - break; + if (!EMPTY_LIST(tab->pending_exports)) + { + /* We'll continue processing exports from this export on */ + struct rt_export_block *reb = TAIL(tab->pending_exports); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } + + return rpe; +} + +#define RT_EXPORT_BULK 1024 + +static void +rt_export_hook(void *_data) +{ + struct rt_export_hook *c = _data; + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + { + RT_LOCK(c->table); + c->rpe_next = rt_next_export(c, RT_PRIV(c->table)); + + if (!c->rpe_next) + { + rt_export_used(RT_PRIV(c->table)); + RT_UNLOCK(c->table); + return; } + + RT_UNLOCK(c->table); + } + + /* Process the export */ + for (uint i=0; i<RT_EXPORT_BULK; i++) + { + rte_export(c, c->rpe_next); + + if (!c->rpe_next) + break; } + + rt_send_export_event(c); } + static inline int -rte_validate(rte *e) +rte_validate(struct channel *ch, rte *e) { int c; - net *n = e->net; + const net_addr *n = e->net; - if (!net_validate(n->n.addr)) + if (!net_validate(n)) { log(L_WARN "Ignoring bogus prefix %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } /* FIXME: better handling different nettypes */ - c = !net_is_flow(n->n.addr) ? - net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE); + c = !net_is_flow(n) ? + net_classify(n): (IADDR_HOST | SCOPE_UNIVERSE); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) { log(L_WARN "Ignoring bogus route %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } - if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest) + if (net_type_match(n, NB_DEST) == !e->attrs->dest) { log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n->n.addr, e->attrs->dest, e->sender->proto->name); + n, e->attrs->dest, ch->proto->name); return 0; } if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) { log(L_WARN "Ignoring unsorted multipath route %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } return 1; } -/** - * rte_free - delete a &rte - * @e: &rte to be deleted - * - * rte_free() deletes the given &rte from the routing table it's linked to. - */ -void -rte_free(rte *e) -{ - if (rta_is_cached(e->attrs)) - rta_free(e->attrs); - sl_free(rte_slab, e); -} - -static inline void -rte_free_quick(rte *e) -{ - rta_free(e->attrs); - sl_free(rte_slab, e); -} - static int rte_same(rte *x, rte *y) { @@ -1078,175 +1298,105 @@ rte_same(rte *x, rte *y) return x->attrs == y->attrs && x->pflags == y->pflags && - x->pref == y->pref && - (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) && + x->src == y->src && rte_is_filtered(x) == rte_is_filtered(y); } static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(rtable_private *table, struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = c->proto; - struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; - rte *before_old = NULL; - rte *old_best = net->routes; + struct rt_import_request *req = c->req; + struct rt_import_stats *stats = &c->stats; + struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; + rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; - rte **k; - k = &net->routes; /* Find and remove original route from the same protocol */ - while (old = *k) + /* Set the stale cycle unless already set */ + if (new && !(new->flags & REF_USE_STALE)) + new->stale_cycle = c->stale_set; + + /* Find and remove original route from the same protocol */ + struct rte_storage **before_old = rte_find(net, src); + + if (*before_old) { - if (old->attrs->src == src) + old = &(old_stored = (*before_old))->rte; + + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) { - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - { - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - rte_free_quick(new); - } - return; - } + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); + + log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); + } if (new && rte_same(old, new)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } - - rte_free_quick(new); return; - } - *k = old->next; - table->rt_count--; - break; - } - k = &old->next; - before_old = old; - } - - /* Save the last accessed position */ - rte **pos = k; + } - if (!old) - before_old = NULL; + *before_old = (*before_old)->next; + table->rt_count--; + } if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } + if (req->preimport) + new = req->preimport(req, new, old); + int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) - { - u32 all_routes = stats->imp_routes + stats->filt_routes; - - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); - - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - rte_free_quick(new); - return; - } - } - - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - { rte_free_quick(new); new = NULL; } - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } - if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); - skip_stats1: - - if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; - if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; + struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ - if (new) + if (new_stored) { - if (before_old && !rte_better(new, before_old)) - k = &before_old->next; + struct rte_storage **k; + if ((before_old != &net->routes) && !rte_better(new, &SKIP_BACK(struct rte_storage, next, before_old)->rte)) + k = before_old; else k = &net->routes; for (; *k; k=&(*k)->next) - if (rte_better(new, *k)) + if (rte_better(new, &(*k)->rte)) break; - new->next = *k; - *k = new; + new_stored->next = *k; + *k = new_stored; table->rt_count++; } @@ -1256,16 +1406,17 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best)) + if (src->owner->rte_recalculate && + src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; - if (new && rte_better(new, old_best)) + if (new_stored && rte_better(&new_stored->rte, old_best)) { /* The first case - the new route is cleary optimal, we link it at the first position */ - new->next = net->routes; - net->routes = new; + new_stored->next = net->routes; + net->routes = new_stored; table->rt_count++; } @@ -1279,10 +1430,10 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) do_recalculate: /* Add the new route to the list */ - if (new) + if (new_stored) { - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } @@ -1290,335 +1441,390 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* Find a new optimal route (if there is any) */ if (net->routes) { - rte **bp = &net->routes; - for (k=&(*bp)->next; *k; k=&(*k)->next) - if (rte_better(*k, *bp)) + struct rte_storage **bp = &net->routes; + for (struct rte_storage **k=&(*bp)->next; *k; k=&(*k)->next) + if (rte_better(&(*k)->rte, &(*bp)->rte)) bp = k; /* And relink it */ - rte *best = *bp; + struct rte_storage *best = *bp; *bp = best->next; best->next = net->routes; net->routes = best; } } - else if (new) + else if (new_stored) { /* The third case - the new route is not better than the old best route (therefore old_best != NULL) and the old best route was not removed (therefore old_best == net->routes). We just link the new route to the old/last position. */ - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } /* The fourth (empty) case - suboptimal route was removed, nothing to do */ } - if (new) + if (new_stored) { - new->lastmod = current_time(); - - if (!old) - { - new->id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new->id); - } - else - new->id = old->id; + new_stored->rte.lastmod = current_time(); + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } + _Bool nb = (new_stored == net->routes); + _Bool ob = (old_best == old); + /* Log the route change */ - if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES)) + if (new_ok && old_ok) { - if (new_ok) - rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added"); - else if (old_ok) - { - if (old != old_best) - rte_trace(c, old, '>', "removed"); - else if (rte_is_ok(net->routes)) - rte_trace(c, old, '>', "removed [replaced]"); - else - rte_trace(c, old, '>', "removed [sole]"); - } + const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, best_indicator[nb][ob]); } + else if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, + (!net->routes->next || !rte_is_ok(&net->routes->next->rte)) ? "added [sole]" : + nb ? "added [best]" : "added"); + else if (old_ok) + rt_rte_trace_in(D_ROUTES, req, old, + (!net->routes || !rte_is_ok(&net->routes->rte)) ? "removed [sole]" : + ob ? "removed [best]" : "removed"); /* Propagate the route change */ - rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best); + rte_announce(table, net, new_stored, old_stored, + net->routes, old_best_stored); + + ev_send(req->list, c->export_announce_event); if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && (table->gc_time + table->config->gc_min_time <= current_time())) rt_schedule_prune(table); +#if 0 + /* Enable and reimplement these callbacks if anybody wants to use them */ if (old_ok && p->rte_remove) p->rte_remove(net, old); if (new_ok && p->rte_insert) - p->rte_insert(net, new); - - if (old) - { - if (!new) - hmap_clear(&table->id_map, old->id); + p->rte_insert(net, &new_stored->rte); +#endif - rte_free_quick(old); - } } -static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */ - -static inline void -rte_update_lock(void) -{ - rte_update_nest_cnt++; -} - -static inline void -rte_update_unlock(void) +rte * +channel_preimport(struct rt_import_request *req, rte *new, rte *old) { - if (!--rte_update_nest_cnt) - lp_flush(rte_update_pool); -} + struct channel *c = SKIP_BACK(struct channel, in_req, req); -static inline void -rte_hide_dummy_routes(net *net, rte **dummy) -{ - if (net->routes && net->routes->attrs->source == RTS_DUMMY) + if (!c->in_table) { - *dummy = net->routes; - net->routes = (*dummy)->next; + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return NULL; + + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); } + + int new_in = new && !rte_is_filtered(new); + int old_in = old && !rte_is_filtered(old); + + if (new_in && !old_in) + if (CHANNEL_LIMIT_PUSH(c, IN)) + if (c->in_keep_filtered) + { + new->flags |= REF_FILTERED; + return new; + } + else + return NULL; + + if (!new_in && old_in) + CHANNEL_LIMIT_POP(c, IN); + + return new; } -static inline void -rte_unhide_dummy_routes(net *net, rte **dummy) +rte * +channel_in_preimport(struct rt_import_request *req, rte *new, rte *old) { - if (*dummy) - { - (*dummy)->next = net->routes; - net->routes = *dummy; - } + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); + + if (new && !old) + if (CHANNEL_LIMIT_PUSH(cat->c, RX)) + return NULL; + + if (!new && old) + CHANNEL_LIMIT_POP(cat->c, RX); + + return new; } -/** - * rte_update - enter a new update to a routing table - * @table: table to be updated - * @c: channel doing the update - * @net: network node - * @p: protocol submitting the update - * @src: protocol originating the update - * @new: a &rte representing the new route or %NULL for route removal. - * - * This function is called by the routing protocols whenever they discover - * a new route or wish to update/remove an existing route. The right announcement - * sequence is to build route attributes first (either un-cached with @aflags set - * to zero or a cached one using rta_lookup(); in this case please note that - * you need to increase the use count of the attributes yourself by calling - * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all - * the appropriate data and finally submit the new &rte by calling rte_update(). - * - * @src specifies the protocol that originally created the route and the meaning - * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the - * same value as @new->attrs->proto. @p specifies the protocol that called - * rte_update(). In most cases it is the same protocol as @src. rte_update() - * stores @p in @new->sender; - * - * When rte_update() gets any route, it automatically validates it (checks, - * whether the network and next hop address are valid IP addresses and also - * whether a normal routing protocol doesn't try to smuggle a host or link - * scope route to the table), converts all protocol dependent attributes stored - * in the &rte to temporary extended attributes, consults import filters of the - * protocol to see if the route should be accepted and/or its attributes modified, - * stores the temporary attributes back to the &rte. - * - * Now, having a "public" version of the route, we - * automatically find any old route defined by the protocol @src - * for network @n, replace it by the new one (or removing it if @new is %NULL), - * recalculate the optimal route for this destination and finally broadcast - * the change (if any) to all routing protocols by calling rte_announce(). - * - * All memory used for attribute lists and other temporary allocations is taken - * from a special linear pool @rte_update_pool and freed when rte_update() - * finishes. - */ +void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); void -rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { - // struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; - const struct filter *filter = c->in_filter; - rte *dummy = NULL; - net *nn; + if (!c->in_req.hook) + return; ASSERT(c->channel_state == CS_UP); - rte_update_lock(); + if (c->in_table) + rte_import(&c->in_table->push, n, new, src); + else + rte_update_direct(c, n, new, src); +} + +void +rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +{ + const struct filter *filter = c->in_filter; + struct channel_import_stats *stats = &c->import_stats; + + rte_update_lock(c); if (new) { - /* Create a temporary table node */ - nn = alloca(sizeof(net) + n->length); - memset(nn, 0, sizeof(net) + n->length); - net_copy(nn->n.addr, n); - - new->net = nn; - new->sender = c; + new->net = n; - if (!new->pref) - new->pref = c->preference; + int fr; - stats->imp_updates_received++; - if (!rte_validate(new)) + stats->updates_received++; + if (!rte_validate(c, new)) { - rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; - goto drop; + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; } - - if (filter == FILTER_REJECT) + else if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, c->rte_update_pool, 0)) > F_ACCEPT)) { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); - - if (! c->in_keep_filtered) - goto drop; + stats->updates_filtered++; + channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); - /* new is a private copy, i could modify it */ - new->flags |= REF_FILTERED; + if (c->in_keep_filtered) + new->flags |= REF_FILTERED; + else + new = NULL; } - else if (filter) - { - rta *old_attrs = NULL; - rte_make_tmp_attrs(&new, rte_update_pool, &old_attrs); + } + else + stats->withdraws_received++; - int fr = f_run(filter, &new, rte_update_pool, 0); - if (fr > F_ACCEPT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + rte_import(&c->in_req, n, new, src); - if (! c->in_keep_filtered) - { - rta_free(old_attrs); - goto drop; - } + rte_update_unlock(c); +} - new->flags |= REF_FILTERED; - } +void +rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rte_src *src) +{ + struct rt_import_hook *hook = req->hook; + if (!hook) + return; - rte_store_tmp_attrs(new, rte_update_pool, old_attrs); - } - if (!rta_is_cached(new->attrs)) /* Need to copy attributes */ - new->attrs = rta_lookup(new->attrs); - new->flags |= REF_COW; + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); + net *nn; + if (new) + { /* Use the actual struct network, not the dummy one */ - nn = net_get(c->table, n); - new->net = nn; + nn = net_get(tab, n); + new->net = nn->n.addr; + new->sender = hook; } - else + else if (!(nn = net_find(tab, n))) { - stats->imp_withdraws_received++; - - if (!(nn = net_find(c->table, n)) || !src) - { - stats->imp_withdraws_ignored++; - rte_update_unlock(); - return; - } + req->hook->stats.withdraws_ignored++; + RT_UNLOCK(tab); + return; } - recalc: /* And recalculate the best route */ - rte_hide_dummy_routes(nn, &dummy); - rte_recalculate(c, nn, new, src); - rte_unhide_dummy_routes(nn, &dummy); + rte_recalculate(tab, hook, nn, new, src); + RT_UNLOCK(tab); +} + +/* Check rtable for best route to given net whether it would be exported do p */ +int +rt_examine(rtable_private *t, net_addr *a, struct channel *c, const struct filter *filter) +{ + net *n = net_find(t, a); + + if (!n || !n->routes) + return 0; + + rte rt = n->routes->rte; + + if (!rte_is_valid(&rt)) + return 0; - rte_update_unlock(); - return; + rte_update_lock(c); - drop: - rte_free(new); - new = NULL; - if (nn = net_find(c->table, n)) - goto recalc; + /* Rest is stripped down export_filter() */ + int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; + if (v == RIC_PROCESS) + v = (f_run(filter, &rt, c->rte_update_pool, FF_SILENT) <= F_ACCEPT); - rte_update_unlock(); + rte_update_unlock(c); + + return v > 0; } -/* Independent call to rte_announce(), used from next hop - recalculation, outside of rte_update(). new must be non-NULL */ -static inline void -rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +static void +rt_export_stopped(void *data) { - rte_update_lock(); - rte_announce(tab, type, net, new, old, new_best, old_best); - rte_update_unlock(); + struct rt_export_hook *hook = data; + + RT_LOCKED(hook->table, tab) + { + /* Drop pending exports */ + rt_export_used(tab); + + /* Unlist */ + rem_node(&hook->n); + } + + /* Report the channel as stopped. */ + hook->stopped(hook->req); + + RT_LOCKED(hook->table, tab) + { + /* Free the hook together with its coroutine. */ + rp_free(hook->pool, tab->rp); + rt_unlock_table(tab); + + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + } } + static inline void -rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */ +rt_set_import_state(struct rt_import_hook *hook, u8 state) { - rte_update_lock(); - rte_recalculate(old->sender, old->net, NULL, old->attrs->src); - rte_update_unlock(); + hook->last_state_change = current_time(); + hook->import_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); } -/* Modify existing route by protocol hook, used for long-lived graceful restart */ static inline void -rte_modify(rte *old) +rt_set_export_state(struct rt_export_hook *hook, u8 state) { - rte_update_lock(); + hook->last_state_change = current_time(); + atomic_store_explicit(&hook->export_state, state, memory_order_release); - rte *new = old->sender->proto->rte_modify(old, rte_update_pool); - if (new != old) - { - if (new) - { - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - new->flags = (old->flags & ~REF_MODIFY) | REF_COW; - } + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} - rte_recalculate(old->sender, old->net, new, old->attrs->src); - } +void +rt_request_import(rtable *t, struct rt_import_request *req) +{ + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + rt_lock_table(tab); + + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + + hook->req = req; + hook->table = t; + + hook->export_announce_event = ev_new_init(tab->rp, rt_import_announce_exports, hook); + + if (!hook->stale_set) + hook->stale_set = hook->stale_valid = hook->stale_pruning = hook->stale_pruned = 1; + + rt_set_import_state(hook, TIS_UP); + + hook->n = (node) {}; + add_tail(&tab->imports, &hook->n); - rte_update_unlock(); + RT_UNLOCK(t); } -/* Check rtable for best route to given net whether it would be exported do p */ -int -rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter) +void +rt_stop_import(struct rt_import_request *req, event *stopped) { - net *n = net_find(t, a); - rte *rt = n ? n->routes : NULL; + ASSERT_DIE(req->hook); + struct rt_import_hook *hook = req->hook; - if (!rte_is_valid(rt)) - return 0; + rtable_private *tab = RT_LOCK(hook->table); - rte_update_lock(); + rt_schedule_prune(tab); - /* Rest is stripped down export_filter() */ - int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0; - if (v == RIC_PROCESS) - { - rte_make_tmp_attrs(&rt, rte_update_pool, NULL); - v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT); - } + rt_set_import_state(hook, TIS_STOP); + hook->stopped = stopped; - /* Discard temporary rte */ - if (rt != n->routes) - rte_free(rt); + if (hook->stale_set < hook->stale_valid) + if (!--tab->rr_count) + rt_schedule_notify(tab); - rte_update_unlock(); + RT_UNLOCK(tab); +} - return v > 0; +void +rt_request_export(rtable *t, struct rt_export_request *req) +{ + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + rt_lock_table(tab); + + pool *p = rp_new(tab->rp, tab->loop, "Export hook"); + struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + + hook->req = req; + hook->table = t; + + /* stats zeroed by mb_allocz */ + + bmap_init(&hook->seq_map, p, 1024); + + rt_set_export_state(hook, TES_HUNGRY); + + hook->n = (node) {}; + add_tail(&tab->exports, &hook->n); + + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + + hook->event = ev_new_init(p, rt_feed_channel, hook); + RT_UNLOCK(t); + + rt_send_export_event(hook); } +void +rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_export_hook *hook = req->hook; + + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); + + /* Stop feeding */ + ev_postpone(hook->event); + + if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) == TES_FEEDING) + fit_get(&tab->fib, &hook->feed_fit); + + hook->event->hook = rt_export_stopped; + hook->stopped = stopped; + + rt_send_export_event(hook); + + RT_UNLOCK(hook->table); + + rt_set_export_state(hook, TES_STOP); +} /** * rt_refresh_begin - start a refresh cycle @@ -1630,21 +1836,48 @@ rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter) * routes to the routing table (by rte_update()). After that, all protocol * routes (more precisely routes with @c as @sender) not sent during the * refresh cycle but still in the table from the past are pruned. This is - * implemented by marking all related routes as stale by REF_STALE flag in - * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD - * flag in rt_refresh_end() and then removing such routes in the prune loop. - */ + * implemented by setting rte->stale_cycle to req->stale_set in rte_update() + * and then dropping all routes with old stale_cycle values in table prune loop. */ void -rt_refresh_begin(rtable *t, struct channel *c) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if (e->sender == c) - e->flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + + RT_LOCK(hook->table); + rtable_private *tab = RT_PRIV(hook->table); + + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", tab->name); + FIB_WALK(&tab->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + tab->rr_count++; + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); + + RT_UNLOCK(tab); } /** @@ -1656,45 +1889,24 @@ rt_refresh_begin(rtable *t, struct channel *c) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct channel *c) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE)) - { - e->flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; + rtable_private *tab = RT_LOCK(hook->table); + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - if (prune) - rt_schedule_prune(t); -} + rt_schedule_prune(tab); -void -rt_modify_stale(rtable *t, struct channel *c) -{ - int prune = 0; + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED)) - { - e->flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; + if (!--tab->rr_count) + rt_schedule_notify(tab); - if (prune) - rt_schedule_prune(t); + RT_UNLOCK(tab); } /** @@ -1704,14 +1916,11 @@ rt_modify_stale(rtable *t, struct channel *c) * This functions dumps contents of a &rte to debug output. */ void -rte_dump(rte *e) -{ - net *n = e->net; - debug("%-1N ", n->n.addr); - debug("PF=%02x pref=%d ", e->pflags, e->pref); - rta_dump(e->attrs); - if (e->attrs->src->proto->proto->dump_attrs) - e->attrs->src->proto->proto->dump_attrs(e); +rte_dump(struct rte_storage *e) +{ + debug("%-1N ", e->rte.net); + debug("PF=%02x ", e->rte.pflags); + rta_dump(e->rte.attrs); debug("\n"); } @@ -1722,20 +1931,22 @@ rte_dump(rte *e) * This function dumps contents of a given routing table to debug output. */ void -rt_dump(rtable *t) +rt_dump(rtable *tab) { - debug("Dump of routing table <%s>\n", t->name); + RT_LOCK(tab); + rtable_private *t = RT_PRIV(tab); + debug("Dump of routing table <%s>%s\n", t->name, t->delete ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif FIB_WALK(&t->fib, net, n) { - rte *e; - for(e=n->routes; e; e=e->next) + for(struct rte_storage *e=n->routes; e; e=e->next) rte_dump(e); } FIB_WALK_END; debug("\n"); + RT_UNLOCK(tab); } /** @@ -1753,73 +1964,105 @@ rt_dump_all(void) rt_dump(t); } -static inline void -rt_schedule_hcu(rtable *tab) +void +rt_dump_hooks(rtable *t) +{ + RT_LOCK(t); + rtable_private *tab = RT_PRIV(t); + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->delete ? " (deleted)" : ""); + debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", + atomic_load(&tab->nhu_state), ev_active(tab->hcu_event), tab->use_count, tab->rt_count); + debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", + tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); + + struct rt_import_hook *ih; + WALK_LIST(ih, tab->imports) + { + ih->req->dump_req(ih->req); + debug(" Import hook %p requested by %p: pref=%u" + " last_state_change=%t import_state=%u stopped=%p\n", + ih, ih->req, ih->stats.pref, + ih->last_state_change, ih->import_state, ih->stopped); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exports) + { + eh->req->dump_req(eh->req); + debug(" Export hook %p requested by %p:" + " refeed_pending=%u last_state_change=%t export_state=%u\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); + } + debug("\n"); + RT_UNLOCK(t); +} + +void +rt_dump_hooks_all(void) { - if (tab->hcu_scheduled) - return; + rtable *t; + node *n; + + debug("Dump of all table hooks\n"); - tab->hcu_scheduled = 1; - ev_schedule(tab->rt_event); + WALK_LIST2(t, n, routing_tables, n) + rt_dump_hooks(t); } static inline void rt_schedule_nhu(rtable *tab) { - if (tab->nhu_state == NHU_CLEAN) - ev_schedule(tab->rt_event); + atomic_fetch_or_explicit(&tab->nhu_state, NHU_SCHEDULED, memory_order_acq_rel); + ev_send_loop(tab->loop, tab->nhu_event); /* state change: * NHU_CLEAN -> NHU_SCHEDULED * NHU_RUNNING -> NHU_DIRTY */ - tab->nhu_state |= NHU_SCHEDULED; } void -rt_schedule_prune(rtable *tab) +rt_schedule_prune(rtable_private *tab) { if (tab->prune_state == 0) - ev_schedule(tab->rt_event); + ev_send_loop(tab->loop, tab->prune_event); /* state change 0->1, 2->3 */ tab->prune_state |= 1; } - -static void -rt_event(void *ptr) +void +rt_export_used(rtable_private *tab) { - rtable *tab = ptr; - - rt_lock_table(tab); + if (config->table_debug) + log(L_TRACE "%s: Export cleanup requested", tab->name); - if (tab->hcu_scheduled) - rt_update_hostcache(tab); - - if (tab->nhu_state) - rt_next_hop_update(tab); - - if (tab->prune_state) - rt_prune_table(tab); - - rt_unlock_table(tab); + ev_send_loop(tab->loop, tab->ec_event); } - static inline btime -rt_settled_time(rtable *tab) +rt_settled_time(rtable_private *tab) { ASSUME(tab->base_settle_time != 0); - return MIN(tab->last_rt_change + tab->config->min_settle_time, - tab->base_settle_time + tab->config->max_settle_time); + btime min_settle_time = tab->rr_count ? tab->config->min_rr_settle_time : tab->config->min_settle_time; + btime max_settle_time = tab->rr_count ? tab->config->max_rr_settle_time : tab->config->max_settle_time; + + DBG("settled time computed from %t %t %t %t as %t / %t, now is %t\n", + tab->name, tab->last_rt_change, min_settle_time, + tab->base_settle_time, max_settle_time, + tab->last_rt_change + min_settle_time, + tab->base_settle_time + max_settle_time, current_time()); + + return MIN(tab->last_rt_change + min_settle_time, + tab->base_settle_time + max_settle_time); } static void rt_settle_timer(timer *t) { - rtable *tab = t->data; + rtable_private *tab = t->data; + ASSERT_DIE(birdloop_inside(tab->loop)); if (!tab->base_settle_time) return; @@ -1827,7 +2070,7 @@ rt_settle_timer(timer *t) btime settled_time = rt_settled_time(tab); if (current_time() < settled_time) { - tm_set(tab->settle_timer, settled_time); + tm_set_in(tab->settle_timer, settled_time, tab->loop); return; } @@ -1836,11 +2079,11 @@ rt_settle_timer(timer *t) struct rt_subscription *s; WALK_LIST(s, tab->subscribers) - s->hook(s); + ev_send(s->event->list, s->event); } static void -rt_kick_settle_timer(rtable *tab) +rt_kick_settle_timer(rtable_private *tab) { tab->base_settle_time = current_time(); @@ -1848,11 +2091,11 @@ rt_kick_settle_timer(rtable *tab) tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0); if (!tm_active(tab->settle_timer)) - tm_set(tab->settle_timer, rt_settled_time(tab)); + tm_set_in(tab->settle_timer, rt_settled_time(tab), tab->loop); } static inline void -rt_schedule_notify(rtable *tab) +rt_schedule_notify(rtable_private *tab) { if (EMPTY_LIST(tab->subscribers)) return; @@ -1864,33 +2107,40 @@ rt_schedule_notify(rtable *tab) } void -rt_subscribe(rtable *tab, struct rt_subscription *s) +rt_subscribe(rtable *t, struct rt_subscription *s) { - s->tab = tab; - rt_lock_table(tab); - add_tail(&tab->subscribers, &s->n); + s->tab = t; + RT_LOCKED(t, tab) + { + rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); + add_tail(&tab->subscribers, &s->n); + } } void rt_unsubscribe(struct rt_subscription *s) { - rem_node(&s->n); - rt_unlock_table(s->tab); + RT_LOCKED(s->tab, tab) + { + rem_node(&s->n); + if (EMPTY_LIST(tab->subscribers) && tm_active(tab->settle_timer)) + tm_stop(tab->settle_timer); + rt_unlock_table(tab); + } } static void rt_free(resource *_r) { - rtable *r = (rtable *) _r; + rtable_private *r = (rtable_private *) _r; DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - - if (r->internal) - return; - - r->config->table = NULL; - rem_node(&r->n); + ASSERT_DIE(r->rt_count == 0); + ASSERT_DIE(!r->cork_active); + ASSERT_DIE(EMPTY_LIST(r->imports)); + ASSERT_DIE(EMPTY_LIST(r->exports)); if (r->hostcache) rt_free_hostcache(r); @@ -1907,14 +2157,14 @@ rt_free(resource *_r) static void rt_res_dump(resource *_r) { - rtable *r = (rtable *) _r; + rtable_private *r = RT_PRIV((rtable *) _r); debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", r->name, net_label[r->addr_type], r->rt_count, r->use_count); } static struct resclass rt_class = { .name = "Routing table", - .size = sizeof(struct rtable), + .size = sizeof(rtable_private), .free = rt_free, .dump = rt_res_dump, .lookup = NULL, @@ -1928,11 +2178,16 @@ rt_setup(pool *pp, struct rtable_config *cf) void *nb = mb_alloc(pp, ns); ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); - pool *p = rp_new(pp, nb); - mb_move(nb, p); + struct birdloop *l = birdloop_new(pp, DOMAIN_ORDER(rtable), nb); + pool *p = birdloop_pool(l); - rtable *t = ralloc(p, &rt_class); + birdloop_enter(l); + + rtable_private *t = ralloc(p, &rt_class); t->rp = p; + t->loop = l; + + t->rte_slab = sl_new(p, sizeof(struct rte_storage)); t->name = cf->name; t->config = cf; @@ -1940,19 +2195,35 @@ rt_setup(pool *pp, struct rtable_config *cf) fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); - if (!(t->internal = cf->internal)) - { - init_list(&t->channels); - hmap_init(&t->id_map, p, 1024); - hmap_set(&t->id_map, 0); + init_list(&t->imports); + init_list(&t->exports); - init_list(&t->subscribers); + hmap_init(&t->id_map, p, 1024); + hmap_set(&t->id_map, 0); - t->rt_event = ev_new_init(p, rt_event, t); - t->last_rt_change = t->gc_time = current_time(); - } + init_list(&t->pending_exports); + init_list(&t->subscribers); + + t->announce_event = ev_new_init(p, rt_announce_exports, t); + t->ec_event = ev_new_init(p, rt_export_cleanup, t); + t->prune_event = ev_new_init(p, rt_prune_table, t); + t->hcu_event = ev_new_init(p, rt_update_hostcache, t); + t->nhu_event = ev_new_init(p, rt_next_hop_update, t); + + t->nhu_event->cork = &rt_cork; + t->prune_event->cork = &rt_cork; + + t->last_rt_change = t->gc_time = current_time(); + t->next_export_seq = 1; - return t; + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + t->nhu_lp = lp_new_default(p); + + mb_move(nb, p); + birdloop_leave(l); + + return (rtable *) t; } /** @@ -1965,13 +2236,11 @@ void rt_init(void) { rta_init(); - rt_table_pool = rp_new(&root_pool, "Routing tables"); - rte_update_pool = lp_new_default(rt_table_pool); - rte_slab = sl_new(rt_table_pool, sizeof(rte)); + rt_table_pool = rp_new(&root_pool, &main_birdloop, "Routing tables"); init_list(&routing_tables); + ev_init_cork(&rt_cork, "Route Table Cork"); } - /** * rt_prune_table - prune a routing table * @@ -1987,12 +2256,15 @@ rt_init(void) * iteration. */ static void -rt_prune_table(rtable *tab) +rt_prune_table(void *data) { + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct fib_iterator *fit = &tab->prune_fit; int limit = 512; - struct channel *c; + struct rt_import_hook *ih; node *n, *x; DBG("Pruning route table %s\n", tab->name); @@ -2003,12 +2275,21 @@ rt_prune_table(rtable *tab) if (tab->prune_state == 0) return; + rt_lock_table(tab); + if (tab->prune_state == 1) { /* Mark channels to flush */ - WALK_LIST2(c, n, tab->channels, table_node) - if (c->channel_state == CS_FLUSHING) - c->flush_active = 1; + WALK_LIST2(ih, n, tab->imports, n) + if (ih->import_state == TIS_STOP) + rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -2017,43 +2298,31 @@ rt_prune_table(rtable *tab) again: FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e; - rescan: - for (e=n->routes; e; e=e->next) + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->sender->flush_active || (e->flags & REF_DISCARD)) - { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; - } - - rte_discard(e); - limit--; + struct rt_import_hook *s = e->rte.sender; - goto rescan; - } - - if (e->flags & REF_MODIFY) + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { if (limit <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); + ev_send_loop(tab->loop, tab->prune_event); + rt_unlock_table(tab); return; } - rte_modify(e); + rte_recalculate(tab, e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; } } - if (!n->routes) /* Orphaned FIB entry */ + if (!n->routes && !n->first) /* Orphaned FIB entry */ { FIB_ITERATE_PUT(fit); fib_delete(&tab->fib, n); @@ -2070,23 +2339,202 @@ again: tab->gc_time = current_time(); /* state change 2->0, 3->1 */ - tab->prune_state &= 1; - - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); + if (tab->prune_state &= 1) + ev_send_loop(tab->loop, tab->prune_event); - /* FIXME: This should be handled in a better way */ - rt_prune_sources(); + uint flushed_channels = 0; /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + ih->flush_seq = tab->next_export_seq; + rt_set_import_state(ih, TIS_WAITING); + flushed_channels++; + } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } + + /* In some cases, we may want to directly proceed to export cleanup */ + if (EMPTY_LIST(tab->exports) && flushed_channels) + rt_export_cleanup(tab); + + rt_unlock_table(tab); +} + +static void +rt_export_cleanup(void *data) +{ + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + + u64 min_seq = ~((u64) 0); + struct rt_pending_export *last_export_to_free = NULL; + struct rt_pending_export *first_export = tab->first_export; + + struct rt_export_hook *eh; + node *n; + WALK_LIST2(eh, n, tab->exports, n) + { + switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + { + case TES_DOWN: + case TES_HUNGRY: + continue; + + case TES_READY: + { + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (!last) + /* No last export means that the channel has exported nothing since last cleanup */ + goto done; + + else if (min_seq > last->seq) + { + min_seq = last->seq; + last_export_to_free = last; + } + continue; + } + + default: + /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */ + goto done; + } + } + + tab->first_export = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup, old first_export seq %lu, new %lu, min_seq %ld", + tab->name, + first_export ? first_export->seq : 0, + tab->first_export ? tab->first_export->seq : 0, + min_seq); + + WALK_LIST2(eh, n, tab->exports, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (last == last_export_to_free) + { + /* This may fail when the channel managed to export more inbetween. This is OK. */ + atomic_compare_exchange_strong_explicit( + &eh->last_export, &last, NULL, + memory_order_release, + memory_order_relaxed); + + DBG("store hook=%p last_export=NULL\n", eh); + } + } + + while (first_export && (first_export->seq <= min_seq)) + { + ASSERT_DIE(first_export->new || first_export->old); + + const net_addr *n = first_export->new ? + first_export->new->rte.net : + first_export->old->rte.net; + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + + ASSERT_DIE(net->first == first_export); + + if (first_export == net->last) + /* The only export here */ + net->last = net->first = NULL; + else + /* First is now the next one */ + net->first = atomic_load_explicit(&first_export->next, memory_order_relaxed); + + /* For now, the old route may be finally freed */ + if (first_export->old) + { + rt_rte_trace_in(D_ROUTES, first_export->old->rte.sender->req, &first_export->old->rte, "freed"); + hmap_clear(&tab->id_map, first_export->old->rte.id); + rte_free(first_export->old, tab); + } + +#ifdef LOCAL_DEBUG + memset(first_export, 0xbd, sizeof(struct rt_pending_export)); +#endif + + struct rt_export_block *reb = HEAD(tab->pending_exports); + ASSERT_DIE(reb == PAGE_HEAD(first_export)); + + u32 pos = (first_export - &reb->export[0]); + u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); + ASSERT_DIE(pos < end); + + struct rt_pending_export *next = NULL; + + if (++pos < end) + next = &reb->export[pos]; + else + { + rem_node(&reb->n); + +#ifdef LOCAL_DEBUG + memset(reb, 0xbe, page_size); +#endif + + free_page(reb); + + if (EMPTY_LIST(tab->pending_exports)) { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); + if (config->table_debug) + log(L_TRACE "%s: Resetting export seq", tab->name); + + node *n; + WALK_LIST2(eh, n, tab->exports, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); + bmap_reset(&eh->seq_map, 1024); + } + + tab->next_export_seq = 1; + } + else + { + reb = HEAD(tab->pending_exports); + next = &reb->export[0]; + } + } + + first_export = next; + } + +done:; + struct rt_import_hook *ih; node *x; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_WAITING) + if (!first_export || (first_export->seq >= ih->flush_seq)) + { + ih->import_state = TIS_CLEARED; + ev_send(ih->req->list, ih->export_announce_event); } - return; + if (EMPTY_LIST(tab->pending_exports) && ev_active(tab->announce_event)) + ev_postpone(tab->announce_event); + + /* If reduced to at most one export block pending */ + if (tab->cork_active && + ((!tab->first_export) || (tab->first_export->seq + 128 > tab->next_export_seq))) + { + tab->cork_active = 0; + ev_uncork(&rt_cork); + if (config->table_debug) + log(L_TRACE "%s: cork released", tab->name); + } } void @@ -2120,7 +2568,7 @@ rta_next_hop_outdated(rta *a) } void -rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) +rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls, linpool *lp) { a->hostentry = he; a->dest = he->dest; @@ -2155,7 +2603,7 @@ no_nexthop: else { nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + nhp = (nhp ? (nhp->next = lp_alloc(lp, NEXTHOP_MAX_SIZE)) : &(a->nh)); } memset(nhp, 0, NEXTHOP_MAX_SIZE); @@ -2210,8 +2658,8 @@ no_nexthop: } } -static inline rte * -rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) +static inline struct rte_storage * +rt_next_hop_update_rte(rtable_private *tab, net *n, rte *old) { rta *a = alloca(RTA_MAX_SIZE); memcpy(a, old->attrs, rta_size(old->attrs)); @@ -2219,58 +2667,71 @@ rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) mpls_label_stack mls = { .len = a->nh.labels_orig }; memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); - rta_apply_hostentry(a, old->attrs->hostentry, &mls); - a->aflags = 0; + rta_apply_hostentry(a, old->attrs->hostentry, &mls, tab->nhu_lp); + a->cached = 0; - rte *e = sl_alloc(rte_slab); - memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(a); + rte e0 = *old; + e0.attrs = a; - return e; + return rte_store(&e0, n, tab); } static inline int -rt_next_hop_update_net(rtable *tab, net *n) +rt_next_hop_update_net(rtable_private *tab, net *n) { - rte **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) - if (rta_next_hop_outdated(e->attrs)) - { - new = rt_next_hop_update_rte(tab, e); - *k = new; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (rta_next_hop_outdated(e->rte.attrs)) + count++; + + if (!count) + return 0; + + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); - rte_trace_in(D_ROUTES, new->sender, new, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (rta_next_hop_outdated(e->rte.attrs)) + { + struct rte_storage *new = rt_next_hop_update_rte(tab, n, &e->rte); /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ - if (e->attrs->src->proto->rte_recalculate) - e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL); + if (e->rte.src->owner->rte_recalculate) + e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; - if (e != old_best) - rte_free_quick(e); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; - e = new; - count++; + /* Get a new ID for the route */ + new->rte.lastmod = current_time(); + new->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, new->rte.id); + + lp_flush(tab->nhu_lp); } - if (!count) - return 0; + ASSERT_DIE(pos == count); /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { - if (!new_best || rte_better(e, *new_best)) + if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; } @@ -2283,32 +2744,39 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->sender, new, "updated [best]"); - - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); - - if (free_old_best) - rte_free_quick(old_best); + /* Announce the changes */ + for (int i=0; i<count; i++) + { + _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + const char *best_indicator[2][2] = { + { "autoupdated", "autoupdated [-best]" }, + { "autoupdated [+best]", "autoupdated [best]" } + }; + rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best); + } return count; } static void -rt_next_hop_update(rtable *tab) +rt_next_hop_update(void *data) { + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == NHU_CLEAN) + if (atomic_load_explicit(&tab->nhu_state, memory_order_acquire) == NHU_CLEAN) return; - if (tab->nhu_state == NHU_SCHEDULED) + rt_lock_table(tab); + + if (atomic_load_explicit(&tab->nhu_state, memory_order_acquire) == NHU_SCHEDULED) { FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = NHU_RUNNING; + ASSERT_DIE(atomic_exchange_explicit(&tab->nhu_state, NHU_RUNNING, memory_order_acq_rel) == NHU_SCHEDULED); } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -2316,7 +2784,8 @@ rt_next_hop_update(rtable *tab) if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); + ev_send_loop(tab->loop, tab->nhu_event); + rt_unlock_table(tab); return; } max_feed -= rt_next_hop_update_net(tab, n); @@ -2327,10 +2796,12 @@ rt_next_hop_update(rtable *tab) * NHU_DIRTY -> NHU_SCHEDULED * NHU_RUNNING -> NHU_CLEAN */ - tab->nhu_state &= 1; + if (atomic_fetch_and_explicit(&tab->nhu_state, NHU_SCHEDULED, memory_order_acq_rel) != NHU_RUNNING) + ev_send_loop(tab->loop, tab->nhu_event); - if (tab->nhu_state != NHU_CLEAN) - ev_schedule(tab->rt_event); + ev_send_loop(tab->loop, tab->announce_event); + + rt_unlock_table(tab); } @@ -2352,6 +2823,10 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_min_time = 5; c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->min_rr_settle_time = 30 S; + c->max_rr_settle_time = 90 S; + c->cork_limit = 4 * page_size / sizeof(struct rt_pending_export); + c->owner = new_config; add_tail(&new_config->tables, &c->n); @@ -2371,7 +2846,7 @@ rt_new_table(struct symbol *s, uint addr_type) * configuration. */ void -rt_lock_table(rtable *r) +rt_lock_table(rtable_private *r) { r->use_count++; } @@ -2385,16 +2860,11 @@ rt_lock_table(rtable *r) * for deletion by configuration changes. */ void -rt_unlock_table(rtable *r) +rt_unlock_table(rtable_private *r) { - if (!--r->use_count && r->deleted) - { - struct config *conf = r->deleted; - - /* Delete the routing table by freeing its pool */ - rt_shutdown(r); - config_del_obstacle(conf); - } + if (!--r->use_count && r->delete && + !r->prune_state && !atomic_load_explicit(&r->nhu_state, memory_order_acquire)) + birdloop_stop_self(r->loop, r->delete, r); } static struct rtable_config * @@ -2404,6 +2874,21 @@ rt_find_table_config(struct config *cf, char *name) return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL; } +static void +rt_done(void *data) +{ + RT_LOCKED((rtable *) data, t) + { + struct rtable_config *tc = t->config; + struct config *c = tc->owner; + + tc->table = NULL; + rem_node(&t->n); + + config_del_obstacle(c); + } +} + /** * rt_commit - commit new routing table configuration * @new: new configuration @@ -2426,14 +2911,15 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *ot = o->table; - if (!ot->deleted) + RT_LOCK(o->table); + rtable_private *ot = RT_PRIV(o->table); + if (!ot->delete) { r = rt_find_table_config(new, o->name); if (r && (r->addr_type == o->addr_type) && !new->shutdown) { DBG("\t%s: same\n", o->name); - r->table = ot; + r->table = (rtable *) ot; ot->name = r->name; ot->config = r; if (o->sorted != r->sorted) @@ -2442,12 +2928,13 @@ rt_commit(struct config *new, struct config *old) else { DBG("\t%s: deleted\n", o->name); - ot->deleted = old; - config_add_obstacle(old); rt_lock_table(ot); + ot->delete = rt_done; + config_add_obstacle(old); rt_unlock_table(ot); } } + RT_UNLOCK(o->table); } } @@ -2461,19 +2948,6 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } -static inline void -do_feed_channel(struct channel *c, net *n, rte *e) -{ - rte_update_lock(); - if (c->ra_mode == RA_ACCEPTED) - rt_notify_accepted(c, n, NULL, NULL, c->refeeding); - else if (c->ra_mode == RA_MERGED) - rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding); - else /* RA_BASIC */ - rt_notify_basic(c, n, e, e, c->refeeding); - rte_update_unlock(); -} - /** * rt_feed_channel - advertise all routes to a channel * @c: channel to be fed @@ -2483,370 +2957,120 @@ do_feed_channel(struct channel *c, net *n, rte *e) * has something to do. (We avoid transferring all the routes in single pass in * order not to monopolize CPU time.) */ -int -rt_feed_channel(struct channel *c) +static void +rt_feed_channel(void *data) { + struct rt_export_hook *c = data; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == ES_FEEDING); - - if (!c->feed_active) - { - FIB_ITERATE_INIT(fit, &c->table->fib); - c->feed_active = 1; - } - - FIB_ITERATE_START(&c->table->fib, fit, net, n) - { - rte *e = n->routes; - if (max_feed <= 0) - { - FIB_ITERATE_PUT(fit); - return 0; - } - - if ((c->ra_mode == RA_OPTIMAL) || - (c->ra_mode == RA_ACCEPTED) || - (c->ra_mode == RA_MERGED)) - if (rte_is_valid(e)) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - do_feed_channel(c, n, e); - max_feed--; - } - - if (c->ra_mode == RA_ANY) - for(e = n->routes; e; e = e->next) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - if (!rte_is_valid(e)) - continue; - - do_feed_channel(c, n, e); - max_feed--; - } - } - FIB_ITERATE_END; - -done: - c->feed_active = 0; - return 1; -} - -/** - * rt_feed_baby_abort - abort protocol feeding - * @c: channel - * - * This function is called by the protocol code when the protocol stops or - * ceases to exist during the feeding. - */ -void -rt_feed_channel_abort(struct channel *c) -{ - if (c->feed_active) - { - /* Unlink the iterator */ - fit_get(&c->table->fib, &c->feed_fit); - c->feed_active = 0; - } -} - - -/* - * Import table - */ - -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - struct rtable *tab = c->in_table; - rte *old, **pos; - net *net; - - if (new) + rtable_private *tab; + if (c->export_state == TES_HUNGRY) { - net = net_get(tab, n); + rt_set_export_state(c, TES_FEEDING); + + tab = RT_LOCK(c->table); - if (!new->pref) - new->pref = c->preference; + struct rt_pending_export *rpe = rt_last_export(tab); + DBG("store hook=%p last_export=%p seq=%lu\n", c, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&c->last_export, rpe, memory_order_relaxed); - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); + FIB_ITERATE_INIT(&c->feed_fit, &tab->fib); } else - { - net = net_find(tab, n); + tab = RT_LOCK(c->table); - if (!net) - goto drop_withdraw; - } + ASSERT_DIE(c->export_state == TES_FEEDING); - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if (old->attrs->src == src) +redo: + FIB_ITERATE_START(&tab->fib, fit, net, n) { - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) + if (max_feed <= 0) { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - - goto drop_update; - } - - /* Move iterator if needed */ - if (old == c->reload_next_rte) - c->reload_next_rte = old->next; - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; - - break; - } - - if (!new) - { - if (!old) - goto drop_withdraw; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - struct channel_limit *l = &c->rx_limit; - if (l->action && !old) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); - - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = net; - - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; - rte_free(new); - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 0; - -drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; - return 0; -} - -int -rt_reload_channel(struct channel *c) -{ - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); + FIB_ITERATE_PUT(fit); + rt_send_export_event(c); - if (!c->reload_active) - { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } + RT_UNLOCK(c->table); + return; + } - do { - for (rte *e = c->reload_next_rte; e; e = e->next) - { - if (max_feed-- <= 0) + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; + RT_UNLOCK(c->table); + return; } - rte_update2(c, e->net->n.addr, rte_do_cow(e), e->attrs->src); - } - - c->reload_next_rte = NULL; - - FIB_ITERATE_START(&tab->fib, fit, net, n) - { - if (c->reload_next_rte = n->routes) + if (!n->routes || !rte_is_valid(&n->routes->rte)) + ; /* if no route, do nothing */ + else if (c->req->export_bulk) { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } - } - FIB_ITERATE_END; - } - while (c->reload_next_rte); - - c->reload_active = 0; - return 1; -} - -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) - { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; - } -} - -void -rt_prune_sync(rtable *t, int all) -{ - struct fib_iterator fit; - - FIB_ITERATE_INIT(&fit, &t->fib); + uint count = rte_feed_count(n); + if (count) + { + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); -again: - FIB_ITERATE_START(&t->fib, &fit, net, n) - { - rte *e, **ee = &n->routes; + struct rt_pending_export *rpe_last, *rpe_first = n->first; + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_last = rpe; - while (e = *ee) - { - if (all || (e->flags & (REF_STALE | REF_DISCARD))) - { - *ee = e->next; - rte_free_quick(e); - t->rt_count--; - } - else - ee = &e->next; - } + FIB_ITERATE_PUT_NEXT(fit, &tab->fib); + RT_UNLOCK(c->table); - if (all || !n->routes) - { - FIB_ITERATE_PUT(&fit); - fib_delete(&t->fib, n); - goto again; - } - } - FIB_ITERATE_END; -} + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); + RT_LOCK(c->table); -/* - * Export table - */ + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + { + rpe_mark_seen(c, rpe); + if (rpe == rpe_last) + break; + ASSERT_DIE(rpe->seq < rpe_last->seq); + } -int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed) -{ - struct rtable *tab = c->out_table; - struct rte_src *src; - rte *old, **pos; - net *net; + max_feed -= count; - if (new) - { - net = net_get(tab, n); - src = new->attrs->src; + goto redo; + } + } + else if (c->req->export_one) + { + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - rte_store_tmp_attrs(new, rte_update_pool, NULL); + struct rt_pending_export *rpe_last, *rpe_first = n->first; + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_last = rpe; - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } - else - { - net = net_find(tab, n); - src = old0->attrs->src; + FIB_ITERATE_PUT_NEXT(fit, &tab->fib); + RT_UNLOCK(c->table); - if (!net) - goto drop_withdraw; - } + c->req->export_one(c->req, n->n.addr, &rpe); - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if ((c->ra_mode != RA_ANY) || (old->attrs->src == src)) - { - if (new && rte_same(old, new)) - { - /* REF_STALE / REF_DISCARD not used in export table */ - /* - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) + RT_LOCK(c->table); + for (struct rt_pending_export *rpe = rpe_first; rpe; rpe = rpe_next(rpe, NULL)) { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; + rpe_mark_seen(c, rpe); + if (rpe == rpe_last) + break; + ASSERT_DIE(rpe->seq < rpe_last->seq); } - */ - goto drop_update; + max_feed--; + goto redo; } - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; - - break; + else + bug("Export request must always provide an export method"); } + FIB_ITERATE_END; - if (!new) - { - if (!old) - goto drop_withdraw; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; + c->event->hook = rt_export_hook; + rt_send_export_event(c); -drop_update: - return refeed; + RT_UNLOCK(c->table); -drop_withdraw: - return 0; + rt_set_export_state(c, TES_READY); } @@ -2953,7 +3177,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) } static void -rt_init_hostcache(rtable *tab) +rt_init_hostcache(rtable_private *tab) { struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache)); init_list(&hc->hostentries); @@ -2969,7 +3193,7 @@ rt_init_hostcache(rtable *tab) } static void -rt_free_hostcache(rtable *tab) +rt_free_hostcache(rtable_private *tab) { struct hostcache *hc = tab->hostcache; @@ -2992,13 +3216,13 @@ rt_free_hostcache(rtable *tab) } static void -rt_notify_hostcache(rtable *tab, net *net) +rt_notify_hostcache(rtable_private *tab, net *net) { - if (tab->hcu_scheduled) + if (ev_active(tab->hcu_event)) return; if (trie_match_net(tab->hostcache->trie, net->n.addr)) - rt_schedule_hcu(tab); + ev_send_loop(tab->loop, tab->hcu_event); } static int @@ -3021,41 +3245,17 @@ rt_get_igp_metric(rte *rt) if (ea) return ea->u.data; - rta *a = rt->attrs; - -#ifdef CONFIG_OSPF - if ((a->source == RTS_OSPF) || - (a->source == RTS_OSPF_IA) || - (a->source == RTS_OSPF_EXT1)) - return rt->u.ospf.metric1; -#endif - -#ifdef CONFIG_RIP - if (a->source == RTS_RIP) - return rt->u.rip.metric; -#endif - -#ifdef CONFIG_BGP - if (a->source == RTS_BGP) - { - u64 metric = bgp_total_aigp_metric(rt); - return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); - } -#endif - -#ifdef CONFIG_BABEL - if (a->source == RTS_BABEL) - return rt->u.babel.metric; -#endif - - if (a->source == RTS_DEVICE) + if (rt->attrs->source == RTS_DEVICE) return 0; + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); + return IGP_METRIC_UNKNOWN; } static int -rt_update_hostentry(rtable *tab, struct hostentry *he) +rt_update_hostentry(rtable_private *tab, struct hostentry *he) { rta *old_src = he->src; int direct = 0; @@ -3072,11 +3272,12 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) net *n = net_route(tab, &he_addr); if (n) { - rte *e = n->routes; - rta *a = e->attrs; - pxlen = n->n.addr->pxlen; + struct rte_storage *e = n->routes; + rta *a = e->rte.attrs; + word pref = a->pref; - if (a->hostentry) + for (struct rte_storage *ee = n->routes; ee; ee = ee->next) + if ((ee->rte.attrs->pref >= pref) && ee->rte.attrs->hostentry) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3084,6 +3285,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } + pxlen = n->n.addr->pxlen; + if (a->dest == RTD_UNICAST) { for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) @@ -3104,7 +3307,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) he->src = rta_clone(a); he->dest = a->dest; he->nexthop_linkable = !direct; - he->igp_metric = rt_get_igp_metric(e); + he->igp_metric = rt_get_igp_metric(&e->rte); } done: @@ -3116,8 +3319,11 @@ done: } static void -rt_update_hostcache(rtable *tab) +rt_update_hostcache(void *data) { + rtable_private *tab = data; + ASSERT_DIE(birdloop_inside(tab->loop)); + struct hostcache *hc = tab->hostcache; struct hostentry *he; node *n, *x; @@ -3138,15 +3344,15 @@ rt_update_hostcache(rtable *tab) if (rt_update_hostentry(tab, he)) rt_schedule_nhu(he->tab); } - - tab->hcu_scheduled = 0; } struct hostentry * -rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) +rt_get_hostentry(rtable *t, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; + rtable_private *tab = RT_LOCK(t); + if (!tab->hostcache) rt_init_hostcache(tab); @@ -3154,10 +3360,13 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) struct hostcache *hc = tab->hostcache; for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next) if (ipa_equal(he->addr, a) && (he->tab == dep)) - return he; + goto done; he = hc_new_hostentry(hc, tab->rp, a, ipa_zero(ll) ? a : ll, dep, k); rt_update_hostentry(tab, he); + +done: + RT_UNLOCK(t); return he; } |