diff options
Diffstat (limited to 'nest/rt-table.c')
-rw-r--r-- | nest/rt-table.c | 3385 |
1 files changed, 1873 insertions, 1512 deletions
diff --git a/nest/rt-table.c b/nest/rt-table.c index a2fa5e77..3ade4237 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -91,7 +91,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -110,22 +110,83 @@ #include "proto/bgp/bgp.h" #endif -pool *rt_table_pool; +#include <stdatomic.h> -static slab *rte_slab; -static linpool *rte_update_pool; +pool *rt_table_pool; list routing_tables; +list deleted_routing_tables; + +struct rt_cork rt_cork; + +/* Data structures for export journal */ +#define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) + +struct rt_export_block { + node n; + _Atomic u32 end; + _Atomic _Bool not_last; + struct rt_pending_export export[]; +}; static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); static void rt_kick_prune_timer(rtable *tab); +static void rt_feed_by_fib(void *); +static void rt_feed_by_trie(void *); +static void rt_feed_equal(void *); +static void rt_feed_for(void *); +static uint rt_feed_net(struct rt_export_hook *c, net *n); +static void rt_check_cork_low(rtable *tab); +static void rt_check_cork_high(rtable *tab); +static void rt_cork_release_hook(void *); + +static inline void rt_export_used(struct rt_exporter *); +static void rt_export_cleanup(rtable *tab); + +static int rte_same(rte *x, rte *y); + +const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", +}; + +const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" +}; + +const char *rt_import_state_name(u8 state) +{ + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; +} + +const char *rt_export_state_name(u8 state) +{ + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; +} +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); +static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); static void net_init_with_trie(struct fib *f, void *N) @@ -395,7 +456,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -422,7 +483,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -455,7 +516,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -482,7 +543,7 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -541,253 +602,55 @@ net_roa_check(rtable *tab, const net_addr *n, u32 asn) * @net: network node * @src: route source * - * The rte_find() function returns a route for destination @net - * which is from route source @src. + * The rte_find() function returns a pointer to a route for destination @net + * which is from route source @src. List end pointer is returned if no route is found. */ -rte * +static struct rte_storage ** rte_find(net *net, struct rte_src *src) { - rte *e = net->routes; + struct rte_storage **e = &net->routes; - while (e && e->attrs->src != src) - e = e->next; - return e; -} - -/** - * rte_get_temp - get a temporary &rte - * @a: attributes to assign to the new route (a &rta; in case it's - * un-cached, rte_update() will create a cached copy automatically) - * - * Create a temporary &rte and bind it with the attributes @a. - * Also set route preference to the default preference set for - * the protocol. - */ -rte * -rte_get_temp(rta *a) -{ - rte *e = sl_alloc(rte_slab); + while ((*e) && (*e)->rte.src != src) + e = &(*e)->next; - e->attrs = a; - e->id = 0; - e->flags = 0; - e->pref = 0; return e; } -rte * -rte_do_cow(rte *r) -{ - rte *e = sl_alloc(rte_slab); - - memcpy(e, r, sizeof(rte)); - e->attrs = rta_clone(r->attrs); - e->flags = 0; - return e; -} - -/** - * rte_cow_rta - get a private writable copy of &rte with writable &rta - * @r: a route entry to be copied - * @lp: a linpool from which to allocate &rta - * - * rte_cow_rta() takes a &rte and prepares it and associated &rta for - * modification. There are three possibilities: First, both &rte and &rta are - * private copies, in that case they are returned unchanged. Second, &rte is - * private copy, but &rta is cached, in that case &rta is duplicated using - * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case - * both structures are duplicated by rte_do_cow() and rta_do_cow(). - * - * Note that in the second case, cached &rta loses one reference, while private - * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs, - * nexthops, ...) with it. To work properly, original shared &rta should have - * another reference during the life of created private copy. - * - * Result: a pointer to the new writable &rte with writable &rta. - */ -rte * -rte_cow_rta(rte *r, linpool *lp) -{ - if (!rta_is_cached(r->attrs)) - return r; - - r = rte_cow(r); - rta *a = rta_do_cow(r->attrs, lp); - rta_free(r->attrs); - r->attrs = a; - return r; -} - -/** - * rte_init_tmp_attrs - initialize temporary ea_list for route - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @max: maximum number of added temporary attribus - * - * This function is supposed to be called from make_tmp_attrs() and - * store_tmp_attrs() hooks before rte_make_tmp_attr() / rte_store_tmp_attr() - * functions. It allocates &ea_list with length for @max items for temporary - * attributes and puts it on top of eattrs stack. - */ -void -rte_init_tmp_attrs(rte *r, linpool *lp, uint max) +struct rte_storage * +rte_store(const rte *r, net *net, rtable *tab) { - struct ea_list *e = lp_alloc(lp, sizeof(struct ea_list) + max * sizeof(eattr)); + struct rte_storage *e = sl_alloc(tab->rte_slab); - e->next = r->attrs->eattrs; - e->flags = EALF_SORTED | EALF_TEMP; - e->count = 0; + e->rte = *r; + e->rte.net = net->n.addr; - r->attrs->eattrs = e; -} + rt_lock_source(e->rte.src); -/** - * rte_make_tmp_attr - make temporary eattr from private route fields - * @r: route entry to be modified - * @id: attribute ID - * @type: attribute type - * @val: attribute value (u32 or adata ptr) - * - * This function is supposed to be called from make_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is supposed to - * be defined (based on route pflags) and if so then it fills &eattr field in - * preallocated temporary &ea_list on top of route @r eattrs stack. - * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs(). - */ -void -rte_make_tmp_attr(rte *r, uint id, uint type, uintptr_t val) -{ - if (r->pflags & EA_ID_FLAG(id)) - { - ea_list *e = r->attrs->eattrs; - eattr *a = &e->attrs[e->count++]; - a->id = id; - a->type = type; - a->flags = 0; + if (ea_is_cached(e->rte.attrs)) + e->rte.attrs = rta_clone(e->rte.attrs); + else + e->rte.attrs = rta_lookup(e->rte.attrs, 1); - if (type & EAF_EMBEDDED) - a->u.data = (u32) val; - else - a->u.ptr = (struct adata *) val; - } + return e; } /** - * rte_store_tmp_attr - store temporary eattr to private route fields - * @r: route entry to be modified - * @id: attribute ID - * - * This function is supposed to be called from store_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is defined in - * route @r eattrs stack, updates route pflags accordingly, undefines it by - * filling &eattr field in preallocated temporary &ea_list on top of the eattrs - * stack, and returns the value. Caller is supposed to store it in the - * appropriate private field. + * rte_free - delete a &rte + * @e: &struct rte_storage to be deleted + * @tab: the table which the rte belongs to * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs() + * rte_free() deletes the given &rte from the routing table it's linked to. */ -uintptr_t -rte_store_tmp_attr(rte *r, uint id) -{ - ea_list *e = r->attrs->eattrs; - eattr *a = ea_find(e->next, id); - - if (a) - { - e->attrs[e->count++] = (struct eattr) { .id = id, .type = EAF_TYPE_UNDEF }; - r->pflags |= EA_ID_FLAG(id); - return (a->type & EAF_EMBEDDED) ? a->u.data : (uintptr_t) a->u.ptr; - } - else - { - r->pflags &= ~EA_ID_FLAG(id); - return 0; - } -} -/** - * rte_make_tmp_attrs - prepare route by adding all relevant temporary route attributes - * @r: route entry to be modified (may be replaced if COW) - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta (may be NULL) - * - * This function expands privately stored protocol-dependent route attributes - * to a uniform &eattr / &ea_list representation. It is essentially a wrapper - * around protocol make_tmp_attrs() hook, which does some additional work like - * ensuring that route @r is writable. - * - * The route @r may be read-only (with %REF_COW flag), in that case rw copy is - * obtained by rte_cow() and @r is replaced. If @rte is originally rw, it may be - * directly modified (and it is never copied). - * - * If the @old_attrs ptr is supplied, the function obtains another reference of - * old cached &rta, that is necessary in some cases (see rte_cow_rta() for - * details). It is freed by rte_store_tmp_attrs(), or manually by rta_free(). - * - * Generally, if caller ensures that @r is read-only (e.g. in route export) then - * it may ignore @old_attrs (and set it to NULL), but must handle replacement of - * @r. If caller ensures that @r is writable (e.g. in route import) then it may - * ignore replacement of @r, but it must handle @old_attrs. - */ void -rte_make_tmp_attrs(rte **r, linpool *lp, rta **old_attrs) -{ - void (*make_tmp_attrs)(rte *r, linpool *lp); - make_tmp_attrs = (*r)->attrs->src->proto->make_tmp_attrs; - - if (!make_tmp_attrs) - return; - - /* We may need to keep ref to old attributes, will be freed in rte_store_tmp_attrs() */ - if (old_attrs) - *old_attrs = rta_is_cached((*r)->attrs) ? rta_clone((*r)->attrs) : NULL; - - *r = rte_cow_rta(*r, lp); - make_tmp_attrs(*r, lp); -} - -/** - * rte_store_tmp_attrs - store temporary route attributes back to private route fields - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta - * - * This function stores temporary route attributes that were expanded by - * rte_make_tmp_attrs() back to private route fields and also undefines them. - * It is essentially a wrapper around protocol store_tmp_attrs() hook, which - * does some additional work like shortcut if there is no change and cleanup - * of @old_attrs reference obtained by rte_make_tmp_attrs(). - */ -static void -rte_store_tmp_attrs(rte *r, linpool *lp, rta *old_attrs) +rte_free(struct rte_storage *e) { - void (*store_tmp_attrs)(rte *rt, linpool *lp); - store_tmp_attrs = r->attrs->src->proto->store_tmp_attrs; - - if (!store_tmp_attrs) - return; - - ASSERT(!rta_is_cached(r->attrs)); - - /* If there is no new ea_list, we just skip the temporary ea_list */ - ea_list *ea = r->attrs->eattrs; - if (ea && (ea->flags & EALF_TEMP)) - r->attrs->eattrs = ea->next; - else - store_tmp_attrs(r, lp); - - /* Free ref we got in rte_make_tmp_attrs(), have to do rta_lookup() first */ - r->attrs = rta_lookup(r->attrs); - rta_free(old_attrs); + rt_unlock_source(e->rte.src); + rta_free(e->rte.attrs); + sl_free(e); } - static int /* Actually better or at least as good as */ rte_better(rte *new, rte *old) { @@ -798,20 +661,23 @@ rte_better(rte *new, rte *old) if (!rte_is_valid(new)) return 0; - if (new->pref > old->pref) + u32 np = rt_get_preference(new); + u32 op = rt_get_preference(old); + + if (np > op) return 1; - if (new->pref < old->pref) + if (np < op) return 0; - if (new->attrs->src->proto->proto != old->attrs->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols * have the same preference, try to break the tie by comparing addresses. Not too * useful, but keeps the ordering of routes unambiguous. */ - return new->attrs->src->proto->proto > old->attrs->src->proto->proto; + return new->src->owner->class > old->src->owner->class; } - if (better = new->attrs->src->proto->rte_better) + if (better = new->src->owner->class->rte_better) return better(new, old); return 0; } @@ -824,180 +690,197 @@ rte_mergable(rte *pri, rte *sec) if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->pref != sec->pref) + if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; - if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->attrs->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s.%s %c %s %N %s", - c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr, - rta_dest_name(e->attrs->dest)); + log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, e->id, + rta_dest_name(rte_dest(e))); } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_in(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '>', msg); + rte_trace(c->in_req.name, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_out(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '<', msg); + rte_trace(c->out_req.name, e, '<', msg); +} + +static inline void +rt_rte_trace_in(uint flag, struct rt_import_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '>', msg); +} + +#if 0 +// seems to be unused at all +static inline void +rt_rte_trace_out(uint flag, struct rt_export_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '<', msg); +} +#endif + +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + count++; + + return count; +} + +static void +rte_feed_obtain(net *n, struct rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + + ASSERT_DIE(i == count); } static rte * -export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent) +export_filter(struct channel *c, rte *rt, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; - rte *rt; - int v; + struct channel_export_stats *stats = &c->export_stats; - rt = rt0; - *rt_free = NULL; + /* Do nothing if we have already rejected the route */ + if (silent && bmap_test(&c->export_reject_map, rt->id)) + goto reject_noset; - v = p->preexport ? p->preexport(c, &rt, pool) : 0; + int v = p->preexport ? p->preexport(c, rt) : 0; if (v < 0) { if (silent) - goto reject; + goto reject_noset; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) - rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); - goto reject; + channel_rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); + goto reject_noset; + } if (v > 0) { if (!silent) - rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); goto accept; } - rte_make_tmp_attrs(&rt, pool, NULL); - v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, &rt, pool, + (f_run(filter, rt, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { if (silent) goto reject; - stats->exp_updates_filtered++; - rte_trace_out(D_FILTERS, c, rt, "filtered out"); + stats->updates_filtered++; + channel_rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } -#ifdef CONFIG_PIPE - /* Pipes need rte with stored tmpattrs, remaining protocols need expanded tmpattrs */ - if (p->proto == &proto_pipe) - rte_store_tmp_attrs(rt, pool, NULL); -#endif - accept: - if (rt != rt0) - *rt_free = rt; + /* We have accepted the route */ + bmap_clear(&c->export_reject_map, rt->id); return rt; reject: + /* We have rejected the route by filter */ + bmap_set(&c->export_reject_map, rt->id); + +reject_noset: /* Discard temporary rte */ - if (rt != rt0) - rte_free(rt); return NULL; } -static inline rte * -export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent) -{ - return export_filter_(c, rt0, rt_free, rte_update_pool, silent); -} - static void -do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed) +do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct channel_export_stats *stats = &c->export_stats; - if (refeed && new) + if (c->refeeding && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { - stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); + stats->updates_rejected++; + channel_rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } - /* Apply export table */ - if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed)) - return; + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); if (new) - stats->exp_updates_accepted++; + stats->updates_accepted++; else - stats->exp_withdraws_accepted++; + stats->withdraws_accepted++; if (old) - { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->exp_routes++; - } if (p->debug & D_ROUTES) { if (new && old) - rte_trace_out(D_ROUTES, c, new, "replaced"); + channel_rte_trace_out(D_ROUTES, c, new, "replaced"); else if (new) - rte_trace_out(D_ROUTES, c, new, "added"); + channel_rte_trace_out(D_ROUTES, c, new, "added"); else if (old) - rte_trace_out(D_ROUTES, c, old, "removed"); + channel_rte_trace_out(D_ROUTES, c, old, "removed"); } p->rt_notify(p, c, net, new, old); } static void -rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) +rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { - // struct proto *p = c->proto; - rte *new_free = NULL; - - if (new) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + if (new && old && rte_same(new, old)) + { + if ((new->id != old->id) && bmap_test(&c->export_map, old->id)) + { + bmap_set(&c->export_map, new->id); + bmap_clear(&c->export_map, old->id); + } + return; + } if (new) - new = export_filter(c, new, &new_free, 0); + new = export_filter(c, new, 0); if (old && !bmap_test(&c->export_map, old->id)) old = NULL; @@ -1005,197 +888,351 @@ rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) if (!new && !old) return; - do_rt_notify(c, net, new, old, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + do_rt_notify(c, net, new, old); } static void -rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed) +channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *rpe) { - // struct proto *p = c->proto; - rte *new_best = NULL; - rte *old_best = NULL; - rte *new_free = NULL; - int new_first = 0; - - /* - * We assume that there are no changes in net route order except (added) - * new_changed and (removed) old_changed. Therefore, the function is not - * compatible with deterministic_med (where nontrivial reordering can happen - * as a result of a route change) and with recomputation of recursive routes - * due to next hop update (where many routes can be changed in one step). - * - * Note that we need this assumption just for optimizations, we could just - * run full new_best recomputation otherwise. - * - * There are three cases: - * feed or old_best is old_changed -> we need to recompute new_best - * old_best is before new_changed -> new_best is old_best, ignore - * old_best is after new_changed -> try new_changed, otherwise old_best - */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (net->routes) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rpe_mark_seen(req->hook, rpe); + if (rpe->old) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} - /* Find old_best - either old_changed, or route for net->routes */ - if (old_changed && bmap_test(&c->export_map, old_changed->id)) - old_best = old_changed; - else +void +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + rte nb0, *new_best = NULL; + const rte *old_best = NULL; + + for (uint i = 0; i < count; i++) { - for (rte *r = net->routes; rte_is_valid(r); r = r->next) + if (!rte_is_valid(feed[i])) + continue; + + /* Has been already rejected, won't bother with it */ + if (!c->refeeding && bmap_test(&c->export_reject_map, feed[i]->id)) + continue; + + /* Previously exported */ + if (!old_best && bmap_test(&c->export_map, feed[i]->id)) { - if (bmap_test(&c->export_map, r->id)) + /* is still best */ + if (!new_best) { - old_best = r; - break; + DBG("rt_notify_accepted: idempotent\n"); + goto done; } - /* Note if new_changed found before old_best */ - if (r == new_changed) - new_first = 1; + /* is superseded */ + old_best = feed[i]; + break; } - } - /* Find new_best */ - if ((new_changed == old_changed) || (old_best == old_changed)) - { - /* Feed or old_best changed -> find first accepted by filters */ - for (rte *r = net->routes; rte_is_valid(r); r = r->next) - if (new_best = export_filter(c, r, &new_free, 0)) - break; + /* Have no new best route yet */ + if (!new_best) + { + /* Try this route not seen before */ + nb0 = *feed[i]; + new_best = export_filter(c, &nb0, 0); + DBG("rt_notify_accepted: checking route id %u: %s\n", feed[i]->id, new_best ? "ok" : "no"); + } } - else + +done: + /* Check obsolete routes for previously exported */ + while (rpe) { - /* Other cases -> either new_changed, or old_best (and nothing changed) */ - if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0))) - new_best = new_changed; - else - return; + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); } - if (!new_best && !old_best) - return; - - do_rt_notify(c, net, new_best, old_best, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); -} - - -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); + /* Nothing to export */ + if (new_best || old_best) + do_rt_notify(c, n, new_best, old_best); + else + DBG("rt_notify_accepted: nothing to export\n"); } rte * -rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent) +rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { - // struct proto *p = c->proto; - struct nexthop *nhs = NULL; - rte *best0, *best, *rt0, *rt, *tmp; + _Thread_local static rte rloc; - best0 = net->routes; - *rt_free = NULL; + // struct proto *p = c->proto; + struct nexthop_adata *nhs = NULL; + rte *best0 = feed[0]; + rte *best = NULL; if (!rte_is_valid(best0)) return NULL; - best = export_filter_(c, best0, rt_free, pool, silent); + /* Already rejected, no need to re-run the filter */ + if (!c->refeeding && bmap_test(&c->export_reject_map, best0->id)) + return NULL; - if (!best || !rte_is_reachable(best)) + rloc = *best0; + best = export_filter(c, &rloc, silent); + + if (!best) + /* Best route doesn't pass the filter */ + return NULL; + + if (!rte_is_reachable(best)) + /* Unreachable routes can't be merged */ return best; - for (rt0 = best0->next; rt0; rt0 = rt0->next) + for (uint i = 1; i < count; i++) { - if (!rte_mergable(best0, rt0)) + if (!rte_mergable(best0, feed[i])) continue; - rt = export_filter_(c, rt0, &tmp, pool, 1); + rte tmp0 = *feed[i]; + rte *tmp = export_filter(c, &tmp0, 1); - if (!rt) + if (!tmp || !rte_is_reachable(tmp)) continue; - if (rte_is_reachable(rt)) - nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(tmp->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (tmp) - rte_free(tmp); + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; } if (nhs) { - nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(best->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (nhs->next) - { - best = rte_cow_rta(best, pool); - nexthop_link(best->attrs, nhs); - } - } + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); - if (best != best0) - *rt_free = best; + ea_set_attr(&best->attrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); + } return best; } - -static void -rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, - rte *new_best, rte *old_best, int refeed) +void +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte *new_free = NULL; + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* We assume that all rte arguments are either NULL or rte_is_valid() */ - - /* This check should be done by the caller */ - if (!new_best && !old_best) - return; + // struct proto *p = c->proto; +#if 0 /* TODO: Find whether this check is possible when processing multiple changes at once. */ /* Check whether the change is relevant to the merged route */ if ((new_best == old_best) && (new_changed != old_changed) && !rte_mergable(new_best, new_changed) && !rte_mergable(old_best, old_changed)) return; +#endif - if (new_best) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte *old_best = NULL; + /* Find old best route */ + for (uint i = 0; i < count; i++) + if (bmap_test(&c->export_map, feed[i]->id)) + { + old_best = feed[i]; + break; + } + + /* Check obsolete routes for previously exported */ + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); + } /* Prepare new merged route */ - if (new_best) - new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0); + rte *new_merged = count ? rt_export_merged(c, feed, count, tmp_linpool, 0) : NULL; + + if (new_merged || old_best) + do_rt_notify(c, n, new_merged, old_best); +} + +void +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte *o = RTE_VALID_OR_NULL(rpe->old_best); + struct rte_storage *new_best = rpe->new_best; + + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_best = rpe->new_best; + rpe = rpe_next(rpe, NULL); + } - /* Check old merged route */ - if (old_best && !bmap_test(&c->export_map, old_best->id)) - old_best = NULL; + rte n0 = RTE_COPY_VALID(new_best); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); +} + +void +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (!new_best && !old_best) + rte *n = RTE_VALID_OR_NULL(rpe->new); + rte *o = RTE_VALID_OR_NULL(rpe->old); + + if (!n && !o) + { + channel_rpe_mark_seen(req, rpe); return; + } - do_rt_notify(c, net, new_best, old_best, refeed); + struct rte_src *src = n ? n->src : o->src; + struct rte_storage *new_latest = rpe->new; - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_latest = rpe->new; + rpe = rpe_next(rpe, src); + } + + rte n0 = RTE_COPY_VALID(new_latest); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); +} + +void +rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + for (uint i=0; i<count; i++) + if (rte_is_valid(feed[i])) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} + +void +rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + bmap_set(&hook->seq_map, rpe->seq); +} + +struct rt_pending_export * +rpe_next(struct rt_pending_export *rpe, struct rte_src *src) +{ + struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + if (!next) + return NULL; + + if (!src) + return next; + + while (rpe = next) + if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src)) + return rpe; + else + next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + return NULL; } +static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); +static void +rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + if (bmap_test(&hook->seq_map, rpe->seq)) + goto ignore; /* Seen already */ + + const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + + switch (hook->req->addr_mode) + { + case TE_ADDR_NONE: + break; + + case TE_ADDR_IN: + if (!net_in_netX(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_EQUAL: + if (!net_equal(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_FOR: + bug("Continuos export of best prefix match not implemented yet."); + + default: + bug("Strange table export address mode: %d", hook->req->addr_mode); + } + + if (rpe->new) + hook->stats.updates_received++; + else + hook->stats.withdraws_received++; + + if (hook->req->export_one) + hook->req->export_one(hook->req, n, rpe); + else if (hook->req->export_bulk) + { + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) + { + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + hook->req->export_bulk(hook->req, n, rpe, feed, count); + } + else + bug("Export request must always provide an export method"); + +ignore: + /* Get the next export if exists */ + hook->rpe_next = rt_next_export_fast(rpe); + + /* The last block may be available to free */ + if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) + CALL(hook->table->used, hook->table); + + /* Releasing this export for cleanup routine */ + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); + atomic_store_explicit(&hook->last_export, rpe, memory_order_release); +} /** * rte_announce - announce a routing table change * @tab: table the route has been added to - * @type: type of route announcement (RA_UNDEF or RA_ANY) * @net: network in question * @new: the new or changed route * @old: the previous route replaced by the new one @@ -1211,13 +1248,6 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * and @new_best and @old_best describes best routes. Other routes are not * affected, but in sorted table the order of other routes might change. * - * Second, There is a bulk change of multiple routes in @net, with shared best - * route selection. In such case separate route changes are described using - * @type of %RA_ANY, with @new and @old specifying the changed route, while - * @new_best and @old_best are NULL. After that, another notification is done - * where @new_best and @old_best are filled (may be the same), but @new and @old - * are NULL. - * * The function announces the change to all associated channels. For each * channel, an appropriate preprocessing is done according to channel &ra_mode. * For example, %RA_OPTIMAL channels receive just changes of best routes. @@ -1232,30 +1262,21 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, + struct rte_storage *new_best, struct rte_storage *old_best) { - if (!rte_is_valid(new)) - new = NULL; - - if (!rte_is_valid(old)) - old = NULL; + int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); + int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best)); - if (!rte_is_valid(new_best)) - new_best = NULL; - - if (!rte_is_valid(old_best)) - old_best = NULL; - - if (!new && !old && !new_best && !old_best) + if ((new == old) && (new_best == old_best)) return; - if (new_best != old_best) + if (new_best_valid || old_best_valid) { - if (new_best) - new_best->sender->stats.pref_routes++; - if (old_best) - old_best->sender->stats.pref_routes--; + if (new_best_valid) + new_best->rte.sender->stats.pref++; + if (old_best_valid) + old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); @@ -1266,103 +1287,255 @@ rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, rt_schedule_notify(tab); - struct channel *c; node *n; - WALK_LIST2(c, n, tab->channels, table_node) + if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) { - if (c->export_state == ES_DOWN) - continue; + /* No export hook and no pending exports to cleanup. We may free the route immediately. */ + if (!old) + return; - if (type && (type != c->ra_mode)) - continue; + hmap_clear(&tab->id_map, old->rte.id); + rte_free(old); + return; + } + + /* Get the pending export structure */ + struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; + u32 end = 0; - switch (c->ra_mode) + if (!EMPTY_LIST(tab->exporter.pending)) + { + rpeb = TAIL(tab->exporter.pending); + end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); + if (end >= RT_PENDING_EXPORT_ITEMS) { - case RA_OPTIMAL: - if (new_best != old_best) - rt_notify_basic(c, net, new_best, old_best, 0); - break; + ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS); + rpebsnl = rpeb; - case RA_ANY: - if (new != old) - rt_notify_basic(c, net, new, old, 0); - break; + rpeb = NULL; + end = 0; + } + } - case RA_ACCEPTED: - rt_notify_accepted(c, net, new, old, 0); - break; + if (!rpeb) + { + rpeb = alloc_page(); + *rpeb = (struct rt_export_block) {}; + add_tail(&tab->exporter.pending, &rpeb->n); + } - case RA_MERGED: - rt_notify_merged(c, net, new, old, new_best, old_best, 0); - break; + /* Fill the pending export */ + struct rt_pending_export *rpe = &rpeb->export[rpeb->end]; + *rpe = (struct rt_pending_export) { + .new = new, + .new_best = new_best, + .old = old, + .old_best = old_best, + .seq = tab->exporter.next_seq++, + }; + + DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu", + tab->name, net->n.addr, + new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL, + old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL, + new_best, old_best, rpe->seq); + + ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); + + if (rpebsnl) + { + _Bool f = 0; + ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1, + memory_order_release, memory_order_relaxed)); + } + + /* Append to the same-network squasher list */ + if (net->last) + { + struct rt_pending_export *rpenull = NULL; + ASSERT_DIE(atomic_compare_exchange_strong_explicit( + &net->last->next, &rpenull, rpe, + memory_order_relaxed, + memory_order_relaxed)); + + } + + net->last = rpe; + + if (!net->first) + net->first = rpe; + + if (tab->exporter.first == NULL) + tab->exporter.first = rpe; + + rt_check_cork_high(tab); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); +} + +static struct rt_pending_export * +rt_next_export_fast(struct rt_pending_export *last) +{ + /* Get the whole export block and find our position in there. */ + struct rt_export_block *rpeb = PAGE_HEAD(last); + u32 pos = (last - &rpeb->export[0]); + u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire); + ASSERT_DIE(pos < end); + + /* Next is in the same block. */ + if (++pos < end) + return &rpeb->export[pos]; + + /* There is another block. */ + if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire)) + { + /* This is OK to do non-atomically because of the not_last flag. */ + rpeb = NODE_NEXT(rpeb); + return &rpeb->export[0]; + } + + /* There is nothing more. */ + return NULL; +} + +static struct rt_pending_export * +rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) +{ + /* As the table is locked, it is safe to reload the last export pointer */ + struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); + + /* It is still valid, let's reuse it */ + if (last) + return rt_next_export_fast(last); + + /* No, therefore we must process the table's first pending export */ + else + return tab->first; +} + +static inline void +rt_send_export_event(struct rt_export_hook *hook) +{ + ev_send(hook->req->list, hook->event); +} + +static void +rt_announce_exports(timer *tm) +{ + rtable *tab = tm->data; + + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) + continue; + + rt_send_export_event(c); + } +} + +static struct rt_pending_export * +rt_last_export(struct rt_exporter *tab) +{ + struct rt_pending_export *rpe = NULL; + + if (!EMPTY_LIST(tab->pending)) + { + /* We'll continue processing exports from this export on */ + struct rt_export_block *reb = TAIL(tab->pending); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } + + return rpe; +} + +#define RT_EXPORT_BULK 1024 + +static void +rt_export_hook(void *_data) +{ + struct rt_export_hook *c = _data; + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + { + c->rpe_next = rt_next_export(c, c->table); + + if (!c->rpe_next) + { + CALL(c->table->used, c->table); + return; } } + + /* Process the export */ + for (uint i=0; i<RT_EXPORT_BULK; i++) + { + rte_export(c, c->rpe_next); + + if (!c->rpe_next) + break; + } + + rt_send_export_event(c); } + static inline int -rte_validate(rte *e) +rte_validate(struct channel *ch, rte *e) { int c; - net *n = e->net; + const net_addr *n = e->net; - if (!net_validate(n->n.addr)) + if (!net_validate(n)) { log(L_WARN "Ignoring bogus prefix %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } /* FIXME: better handling different nettypes */ - c = !net_is_flow(n->n.addr) ? - net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE); + c = !net_is_flow(n) ? + net_classify(n): (IADDR_HOST | SCOPE_UNIVERSE); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) { log(L_WARN "Ignoring bogus route %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } - if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest) + if (net_type_match(n, NB_DEST)) { - /* Exception for flowspec that failed validation */ - if (net_is_flow(n->n.addr) && (e->attrs->dest == RTD_UNREACHABLE)) - return 1; + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); - log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n->n.addr, e->attrs->dest, e->sender->proto->name); - return 0; - } + if (dest == RTD_NONE) + { + log(L_WARN "Ignoring route %N with no destination received via %s", + n, ch->proto->name); + return 0; + } - if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n, ch->proto->name); + return 0; + } + } + else if (ea_find(e->attrs, &ea_gen_nexthop)) { - log(L_WARN "Ignoring unsorted multipath route %N received via %s", - n->n.addr, e->sender->proto->name); + log(L_WARN "Ignoring route %N having a nexthop attribute received via %s", + n, ch->proto->name); return 0; } return 1; } -/** - * rte_free - delete a &rte - * @e: &rte to be deleted - * - * rte_free() deletes the given &rte from the routing table it's linked to. - */ -void -rte_free(rte *e) -{ - if (rta_is_cached(e->attrs)) - rta_free(e->attrs); - sl_free(rte_slab, e); -} - -static inline void -rte_free_quick(rte *e) -{ - rta_free(e->attrs); - sl_free(rte_slab, e); -} - static int rte_same(rte *x, rte *y) { @@ -1370,175 +1543,116 @@ rte_same(rte *x, rte *y) return x->attrs == y->attrs && x->pflags == y->pflags && - x->pref == y->pref && - (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) && + x->src == y->src && rte_is_filtered(x) == rte_is_filtered(y); } static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = c->proto; + struct rt_import_request *req = c->req; struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; - rte *before_old = NULL; - rte *old_best = net->routes; + struct rt_import_stats *stats = &c->stats; + struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; + rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; - rte **k; - k = &net->routes; /* Find and remove original route from the same protocol */ - while (old = *k) + /* If the new route is identical to the old one, we find the attributes in + * cache and clone these with no performance drop. OTOH, if we were to lookup + * the attributes, such a route definitely hasn't been anywhere yet, + * therefore it's definitely worth the time. */ + struct rte_storage *new_stored = NULL; + if (new) + new = &(new_stored = rte_store(new, net, table))->rte; + + /* Find and remove original route from the same protocol */ + struct rte_storage **before_old = rte_find(net, src); + + if (*before_old) { - if (old->attrs->src == src) + old = &(old_stored = (*before_old))->rte; + + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) { - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - { - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - rte_free_quick(new); - } - return; - } + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); - if (new && rte_same(old, new)) + log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); + } + + if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } - rte_free_quick(new); + /* We need to free the already stored route here before returning */ + rte_free(new_stored); return; - } - *k = old->next; - table->rt_count--; - break; - } - k = &old->next; - before_old = old; - } - - /* Save the last accessed position */ - rte **pos = k; + } - if (!old) - before_old = NULL; + *before_old = (*before_old)->next; + table->rt_count--; + } if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } + /* If rejected by import limit, we need to pretend there is no route */ + if (req->preimport && (req->preimport(req, new, old) == 0)) + { + rte_free(new_stored); + new_stored = NULL; + new = NULL; + } + int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) - { - u32 all_routes = stats->imp_routes + stats->filt_routes; - - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); - - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - rte_free_quick(new); - return; - } - } - - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - { rte_free_quick(new); new = NULL; } - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } - if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); - skip_stats1: - - if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; - if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ - if (new) + if (new_stored) { - if (before_old && !rte_better(new, before_old)) - k = &before_old->next; + struct rte_storage **k; + if ((before_old != &net->routes) && !rte_better(new, &SKIP_BACK(struct rte_storage, next, before_old)->rte)) + k = before_old; else k = &net->routes; for (; *k; k=&(*k)->next) - if (rte_better(new, *k)) + if (rte_better(new, &(*k)->rte)) break; - new->next = *k; - *k = new; + new_stored->next = *k; + *k = new_stored; table->rt_count++; } @@ -1548,16 +1662,17 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best)) + if (src->owner->rte_recalculate && + src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; - if (new && rte_better(new, old_best)) + if (new_stored && rte_better(&new_stored->rte, old_best)) { /* The first case - the new route is cleary optimal, we link it at the first position */ - new->next = net->routes; - net->routes = new; + new_stored->next = net->routes; + net->routes = new_stored; table->rt_count++; } @@ -1571,10 +1686,10 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) do_recalculate: /* Add the new route to the list */ - if (new) + if (new_stored) { - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } @@ -1582,335 +1697,417 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* Find a new optimal route (if there is any) */ if (net->routes) { - rte **bp = &net->routes; - for (k=&(*bp)->next; *k; k=&(*k)->next) - if (rte_better(*k, *bp)) + struct rte_storage **bp = &net->routes; + for (struct rte_storage **k=&(*bp)->next; *k; k=&(*k)->next) + if (rte_better(&(*k)->rte, &(*bp)->rte)) bp = k; /* And relink it */ - rte *best = *bp; + struct rte_storage *best = *bp; *bp = best->next; best->next = net->routes; net->routes = best; } } - else if (new) + else if (new_stored) { /* The third case - the new route is not better than the old best route (therefore old_best != NULL) and the old best route was not removed (therefore old_best == net->routes). We just link the new route to the old/last position. */ - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } /* The fourth (empty) case - suboptimal route was removed, nothing to do */ } - if (new) + if (new_stored) { - new->lastmod = current_time(); - - if (!old) - { - new->id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new->id); - } - else - new->id = old->id; + new_stored->rte.lastmod = current_time(); + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } /* Log the route change */ - if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES)) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - if (new_ok) - rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added"); - else if (old_ok) - { - if (old != old_best) - rte_trace(c, old, '>', "removed"); - else if (rte_is_ok(net->routes)) - rte_trace(c, old, '>', "removed [replaced]"); - else - rte_trace(c, old, '>', "removed [sole]"); - } + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } /* Propagate the route change */ - rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best); + rte_announce(table, net, new_stored, old_stored, + net->routes, old_best_stored); if (!net->routes && (table->gc_counter++ >= table->config->gc_threshold)) rt_kick_prune_timer(table); +#if 0 + /* Enable and reimplement these callbacks if anybody wants to use them */ if (old_ok && p->rte_remove) p->rte_remove(net, old); if (new_ok && p->rte_insert) - p->rte_insert(net, new); - - if (old) - { - if (!new) - hmap_clear(&table->id_map, old->id); + p->rte_insert(net, &new_stored->rte); +#endif - rte_free_quick(old); - } } -static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */ - -static inline void -rte_update_lock(void) +int +channel_preimport(struct rt_import_request *req, rte *new, rte *old) { - rte_update_nest_cnt++; -} + struct channel *c = SKIP_BACK(struct channel, in_req, req); -static inline void -rte_update_unlock(void) -{ - if (!--rte_update_nest_cnt) - lp_flush(rte_update_pool); -} + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return 0; -static inline void -rte_hide_dummy_routes(net *net, rte **dummy) -{ - if (net->routes && net->routes->attrs->source == RTS_DUMMY) - { - *dummy = net->routes; - net->routes = (*dummy)->next; - } -} + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); -static inline void -rte_unhide_dummy_routes(net *net, rte **dummy) -{ - if (*dummy) - { - (*dummy)->next = net->routes; - net->routes = *dummy; - } -} + int new_in = new && !rte_is_filtered(new); + int old_in = old && !rte_is_filtered(old); -/** - * rte_update - enter a new update to a routing table - * @table: table to be updated - * @c: channel doing the update - * @net: network node - * @p: protocol submitting the update - * @src: protocol originating the update - * @new: a &rte representing the new route or %NULL for route removal. - * - * This function is called by the routing protocols whenever they discover - * a new route or wish to update/remove an existing route. The right announcement - * sequence is to build route attributes first (either un-cached with @aflags set - * to zero or a cached one using rta_lookup(); in this case please note that - * you need to increase the use count of the attributes yourself by calling - * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all - * the appropriate data and finally submit the new &rte by calling rte_update(). - * - * @src specifies the protocol that originally created the route and the meaning - * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the - * same value as @new->attrs->proto. @p specifies the protocol that called - * rte_update(). In most cases it is the same protocol as @src. rte_update() - * stores @p in @new->sender; - * - * When rte_update() gets any route, it automatically validates it (checks, - * whether the network and next hop address are valid IP addresses and also - * whether a normal routing protocol doesn't try to smuggle a host or link - * scope route to the table), converts all protocol dependent attributes stored - * in the &rte to temporary extended attributes, consults import filters of the - * protocol to see if the route should be accepted and/or its attributes modified, - * stores the temporary attributes back to the &rte. - * - * Now, having a "public" version of the route, we - * automatically find any old route defined by the protocol @src - * for network @n, replace it by the new one (or removing it if @new is %NULL), - * recalculate the optimal route for this destination and finally broadcast - * the change (if any) to all routing protocols by calling rte_announce(). - * - * All memory used for attribute lists and other temporary allocations is taken - * from a special linear pool @rte_update_pool and freed when rte_update() - * finishes. - */ + if (new_in && !old_in) + if (CHANNEL_LIMIT_PUSH(c, IN)) + if (c->in_keep & RIK_REJECTED) + { + new->flags |= REF_FILTERED; + return 1; + } + else + return 0; + + if (!new_in && old_in) + CHANNEL_LIMIT_POP(c, IN); + + return 1; +} void -rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { - // struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; - const struct filter *filter = c->in_filter; - rte *dummy = NULL; - net *nn; + if (!c->in_req.hook) + return; ASSERT(c->channel_state == CS_UP); - rte_update_lock(); + /* The import reloader requires prefilter routes to be the first layer */ + if (new && (c->in_keep & RIK_PREFILTER)) + if (ea_is_cached(new->attrs) && !new->attrs->next) + new->attrs = ea_clone(new->attrs); + else + new->attrs = ea_lookup(new->attrs, 0); + + const struct filter *filter = c->in_filter; + struct channel_import_stats *stats = &c->import_stats; + if (new) { - /* Create a temporary table node */ - nn = alloca(sizeof(net) + n->length); - memset(nn, 0, sizeof(net) + n->length); - net_copy(nn->n.addr, n); + new->net = n; - new->net = nn; - new->sender = c; + int fr; - if (!new->pref) - new->pref = c->preference; - - stats->imp_updates_received++; - if (!rte_validate(new)) + stats->updates_received++; + if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { - rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; - goto drop; + stats->updates_filtered++; + channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); + + if (c->in_keep & RIK_REJECTED) + new->flags |= REF_FILTERED; + else + new = NULL; } - if (filter == FILTER_REJECT) + if (new) + if (net_is_flow(n)) + rt_flowspec_resolve_rte(new, c); + else + rt_next_hop_resolve_rte(new); + + if (new && !rte_validate(c, new)) { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; + } - if (! c->in_keep_filtered) - goto drop; + } + else + stats->withdraws_received++; - /* new is a private copy, i could modify it */ - new->flags |= REF_FILTERED; - } - else if (filter) - { - rta *old_attrs = NULL; - rte_make_tmp_attrs(&new, rte_update_pool, &old_attrs); + rte_import(&c->in_req, n, new, src); - int fr = f_run(filter, &new, rte_update_pool, 0); - if (fr > F_ACCEPT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + /* Now the route attributes are kept by the in-table cached version + * and we may drop the local handle */ + if (new && (c->in_keep & RIK_PREFILTER)) + { + /* There may be some updates on top of the original attribute block */ + ea_list *a = new->attrs; + while (a->next) + a = a->next; - if (! c->in_keep_filtered) - { - rta_free(old_attrs); - goto drop; - } + ea_free(a); + } - new->flags |= REF_FILTERED; - } +} - rte_store_tmp_attrs(new, rte_update_pool, old_attrs); - } - if (!rta_is_cached(new->attrs)) /* Need to copy attributes */ - new->attrs = rta_lookup(new->attrs); - new->flags |= REF_COW; +void +rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rte_src *src) +{ + struct rt_import_hook *hook = req->hook; + if (!hook) + return; + net *nn; + if (new) + { /* Use the actual struct network, not the dummy one */ - nn = net_get(c->table, n); - new->net = nn; + nn = net_get(hook->table, n); + new->net = nn->n.addr; + new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } - else + else if (!(nn = net_find(hook->table, n))) { - stats->imp_withdraws_received++; - - if (!(nn = net_find(c->table, n)) || !src) - { - stats->imp_withdraws_ignored++; - rte_update_unlock(); - return; - } + req->hook->stats.withdraws_ignored++; + return; } - recalc: /* And recalculate the best route */ - rte_hide_dummy_routes(nn, &dummy); - rte_recalculate(c, nn, new, src); - rte_unhide_dummy_routes(nn, &dummy); + rte_recalculate(hook, nn, new, src); +} + +/* Check rtable for best route to given net whether it would be exported do p */ +int +rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +{ + net *n = net_find(t, a); + + if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) + return 0; - rte_update_unlock(); - return; + rte rt = n->routes->rte; - drop: - rte_free(new); - new = NULL; - if (nn = net_find(c->table, n)) - goto recalc; + /* Rest is stripped down export_filter() */ + int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; + if (v == RIC_PROCESS) + v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); - rte_update_unlock(); + return v > 0; } -/* Independent call to rte_announce(), used from next hop - recalculation, outside of rte_update(). new must be non-NULL */ -static inline void -rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +static void +rt_table_export_done(struct rt_export_hook *hook) { - rte_update_lock(); - rte_announce(tab, type, net, new, old, new_best, old_best); - rte_update_unlock(); + struct rt_exporter *re = hook->table; + struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + + rt_unlock_table(tab); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); } -static inline void -rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */ +static void +rt_export_stopped(void *data) { - rte_update_lock(); - rte_recalculate(old->sender, old->net, NULL, old->attrs->src); - rte_update_unlock(); + struct rt_export_hook *hook = data; + struct rt_exporter *tab = hook->table; + + /* Drop pending exports */ + CALL(tab->used, tab); + + /* Unlist */ + rem_node(&hook->n); + + /* Report the channel as stopped. */ + hook->stopped(hook->req); + + /* Reporting the hook as finished. */ + CALL(tab->done, hook); + + /* Free the hook. */ + rfree(hook->pool); } -/* Modify existing route by protocol hook, used for long-lived graceful restart */ static inline void -rte_modify(rte *old) +rt_set_import_state(struct rt_import_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->import_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_set_export_state(struct rt_export_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + atomic_store_explicit(&hook->export_state, state, memory_order_release); + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_request_import(rtable *tab, struct rt_import_request *req) +{ + rt_lock_table(tab); + + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + + hook->req = req; + hook->table = tab; + + rt_set_import_state(hook, TIS_UP); + + hook->n = (node) {}; + add_tail(&tab->imports, &hook->n); +} + +void +rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) { - rte_update_lock(); + ASSERT_DIE(req->hook); + struct rt_import_hook *hook = req->hook; - rte *new = old->sender->proto->rte_modify(old, rte_update_pool); - if (new != old) + rt_schedule_prune(hook->table); + + rt_set_import_state(hook, TIS_STOP); + + hook->stopped = stopped; +} + +static struct rt_export_hook * +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +{ + rtable *tab = SKIP_BACK(rtable, exporter, re); + rt_lock_table(tab); + + pool *p = rp_new(tab->rp, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + + /* stats zeroed by mb_allocz */ + switch (req->addr_mode) { - if (new) - { - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - new->flags = (old->flags & ~REF_MODIFY) | REF_COW; - } + case TE_ADDR_IN: + if (tab->trie && net_val_match(tab->addr_type, NB_IP)) + { + hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_lock = rt_lock_trie(tab); + trie_walk_init(hook->walk_state, tab->trie, req->addr); + hook->event = ev_new_init(p, rt_feed_by_trie, hook); + break; + } + /* fall through */ + case TE_ADDR_NONE: + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + hook->event = ev_new_init(p, rt_feed_by_fib, hook); + break; + + case TE_ADDR_EQUAL: + hook->event = ev_new_init(p, rt_feed_equal, hook); + break; + + case TE_ADDR_FOR: + hook->event = ev_new_init(p, rt_feed_for, hook); + break; - rte_recalculate(old->sender, old->net, new, old->attrs->src); + default: + bug("Requested an unknown export address mode"); } - rte_update_unlock(); + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + + return hook; } -/* Check rtable for best route to given net whether it would be exported do p */ -int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +void +rt_request_export(struct rt_exporter *re, struct rt_export_request *req) { - struct proto *p = c->proto; - net *n = net_find(t, a); - rte *rt = n ? n->routes : NULL; + struct rt_export_hook *hook = req->hook = re->start(re, req); - if (!rte_is_valid(rt)) - return 0; + hook->req = req; + hook->table = re; - rte_update_lock(); + bmap_init(&hook->seq_map, hook->pool, 1024); - /* Rest is stripped down export_filter() */ - int v = p->preexport ? p->preexport(c, &rt, rte_update_pool) : 0; - if (v == RIC_PROCESS) + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + + hook->n = (node) {}; + add_tail(&re->hooks, &hook->n); + + /* Regular export */ + rt_set_export_state(hook, TES_FEEDING); + rt_send_export_event(hook); +} + +static void +rt_table_export_stop(struct rt_export_hook *hook) +{ + rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + + if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) + return; + + switch (hook->req->addr_mode) { - rte_make_tmp_attrs(&rt, rte_update_pool, NULL); - v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT); + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; } +} - /* Discard temporary rte */ - if (rt != n->routes) - rte_free(rt); +void +rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_export_hook *hook = req->hook; - rte_update_unlock(); + /* Cancel the feeder event */ + ev_postpone(hook->event); - return v > 0; -} + /* Stop feeding from the exporter */ + CALL(hook->table->stop, hook); + /* Reset the event as the stopped event */ + hook->event->hook = rt_export_stopped; + hook->stopped = stopped; + + /* Update export state */ + rt_set_export_state(hook, TES_STOP); + + /* Run the stopped event */ + rt_send_export_event(hook); +} /** * rt_refresh_begin - start a refresh cycle @@ -1927,16 +2124,38 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct channel *c) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if (e->sender == c) - e->flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&hook->table->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + /* Setting a new value of the stale modifier */ + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); } /** @@ -1948,45 +2167,18 @@ rt_refresh_begin(rtable *t, struct channel *c) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct channel *c) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; - - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE)) - { - e->flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); - if (prune) - rt_schedule_prune(t); -} + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); -void -rt_modify_stale(rtable *t, struct channel *c) -{ - int prune = 0; + rt_schedule_prune(hook->table); - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED)) - { - e->flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; - - if (prune) - rt_schedule_prune(t); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } /** @@ -1996,14 +2188,11 @@ rt_modify_stale(rtable *t, struct channel *c) * This functions dumps contents of a &rte to debug output. */ void -rte_dump(rte *e) -{ - net *n = e->net; - debug("%-1N ", n->n.addr); - debug("PF=%02x pref=%d ", e->pflags, e->pref); - rta_dump(e->attrs); - if (e->attrs->src->proto->proto->dump_attrs) - e->attrs->src->proto->proto->dump_attrs(e); +rte_dump(struct rte_storage *e) +{ + debug("%-1N ", e->rte.net); + debug("PF=%02x ", e->rte.pflags); + ea_dump(e->rte.attrs); debug("\n"); } @@ -2016,14 +2205,13 @@ rte_dump(rte *e) void rt_dump(rtable *t) { - debug("Dump of routing table <%s>\n", t->name); + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif FIB_WALK(&t->fib, net, n) { - rte *e; - for(e=n->routes; e; e=e->next) + for(struct rte_storage *e=n->routes; e; e=e->next) rte_dump(e); } FIB_WALK_END; @@ -2043,6 +2231,54 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump(t); +} + +void +rt_dump_hooks(rtable *tab) +{ + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); + debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", + tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); + + struct rt_import_hook *ih; + WALK_LIST(ih, tab->imports) + { + ih->req->dump_req(ih->req); + debug(" Import hook %p requested by %p: pref=%u" + " last_state_change=%t import_state=%u stopped=%p\n", + ih, ih->req, ih->stats.pref, + ih->last_state_change, ih->import_state, ih->stopped); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exporter.hooks) + { + eh->req->dump_req(eh->req); + debug(" Export hook %p requested by %p:" + " refeed_pending=%u last_state_change=%t export_state=%u\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); + } + debug("\n"); +} + +void +rt_dump_hooks_all(void) +{ + rtable *t; + node *n; + + debug("Dump of all table hooks\n"); + + WALK_LIST2(t, n, routing_tables, n) + rt_dump_hooks(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump_hooks(t); } static inline void @@ -2078,6 +2314,20 @@ rt_schedule_prune(rtable *tab) tab->prune_state |= 1; } +static void +rt_export_used(struct rt_exporter *e) +{ + rtable *tab = SKIP_BACK(rtable, exporter, e); + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup requested", tab->name); + + if (tab->export_used) + return; + + tab->export_used = 1; + ev_schedule(tab->rt_event); +} static void rt_event(void *ptr) @@ -2086,6 +2336,25 @@ rt_event(void *ptr) rt_lock_table(tab); + if (tab->export_used) + rt_export_cleanup(tab); + + if ( + tab->hcu_corked || + tab->nhu_corked || + (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event) + ) + { + if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug) + log(L_TRACE "%s: Auxiliary routines corked", tab->name); + + tab->hcu_corked |= tab->hcu_scheduled; + tab->hcu_scheduled = 0; + + tab->nhu_corked |= tab->nhu_state; + tab->nhu_state = 0; + } + if (tab->hcu_scheduled) rt_update_hostcache(tab); @@ -2098,6 +2367,22 @@ rt_event(void *ptr) rt_unlock_table(tab); } +static void +rt_uncork_event(void *ptr) +{ + rtable *tab = ptr; + + tab->hcu_scheduled |= tab->hcu_corked; + tab->hcu_corked = 0; + + tab->nhu_state |= tab->nhu_corked; + tab->nhu_corked = 0; + + if (config->table_debug) + log(L_TRACE "%s: Auxiliary routines uncorked", tab->name); + + ev_schedule(tab->rt_event); +} static void rt_prune_timer(timer *t) @@ -2151,7 +2436,7 @@ rt_settle_timer(timer *t) struct rt_subscription *s; WALK_LIST(s, tab->subscribers) - s->hook(s); + ev_send(s->list, s->event); } static void @@ -2183,6 +2468,7 @@ rt_subscribe(rtable *tab, struct rt_subscription *s) { s->tab = tab; rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); add_tail(&tab->subscribers, &s->n); } @@ -2285,9 +2571,6 @@ rt_free(resource *_r) DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - if (r->internal) - return; - r->config->table = NULL; rem_node(&r->n); @@ -2323,16 +2606,13 @@ static struct resclass rt_class = { rtable * rt_setup(pool *pp, struct rtable_config *cf) { - int ns = strlen("Routing table ") + strlen(cf->name) + 1; - void *nb = mb_alloc(pp, ns); - ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); - - pool *p = rp_new(pp, nb); - mb_move(nb, p); + pool *p = rp_newf(pp, "Routing table %s", cf->name); rtable *t = ralloc(p, &rt_class); t->rp = p; + t->rte_slab = sl_new(p, sizeof(struct rte_storage)); + t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; @@ -2347,24 +2627,41 @@ rt_setup(pool *pp, struct rtable_config *cf) t->fib.init = net_init_with_trie; } - init_list(&t->channels); init_list(&t->flowspec_links); + + t->exporter = (struct rt_exporter) { + .addr_type = t->addr_type, + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, + .used = rt_export_used, + }; + + init_list(&t->exporter.hooks); + init_list(&t->exporter.pending); + + init_list(&t->imports); + + hmap_init(&t->id_map, p, 1024); + hmap_set(&t->id_map, 0); + init_list(&t->subscribers); - if (!(t->internal = cf->internal)) - { - hmap_init(&t->id_map, p, 1024); - hmap_set(&t->id_map, 0); + t->rt_event = ev_new_init(p, rt_event, t); + t->uncork_event = ev_new_init(p, rt_uncork_event, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); + t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); + t->last_rt_change = t->gc_time = current_time(); + t->exporter.next_seq = 1; - t->rt_event = ev_new_init(p, rt_event, t); - t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); - t->last_rt_change = t->gc_time = current_time(); + t->cork_threshold = cf->cork_threshold; - if (rt_is_flow(t)) - { - t->flowspec_trie = f_new_trie(lp_new_default(p), 0); - t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); - } + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + if (rt_is_flow(t)) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } return t; @@ -2381,9 +2678,10 @@ rt_init(void) { rta_init(); rt_table_pool = rp_new(&root_pool, "Routing tables"); - rte_update_pool = lp_new_default(rt_table_pool); - rte_slab = sl_new(rt_table_pool, sizeof(rte)); init_list(&routing_tables); + init_list(&deleted_routing_tables); + ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); + rt_cork.run = (event) { .hook = rt_cork_release_hook }; } @@ -2407,7 +2705,7 @@ rt_prune_table(rtable *tab) struct fib_iterator *fit = &tab->prune_fit; int limit = 2000; - struct channel *c; + struct rt_import_hook *ih; node *n, *x; DBG("Pruning route table %s\n", tab->name); @@ -2421,9 +2719,16 @@ rt_prune_table(rtable *tab) if (tab->prune_state == 1) { /* Mark channels to flush */ - WALK_LIST2(c, n, tab->channels, table_node) - if (c->channel_state == CS_FLUSHING) - c->flush_active = 1; + WALK_LIST2(ih, n, tab->imports, n) + if (ih->import_state == TIS_STOP) + rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -2442,8 +2747,6 @@ rt_prune_table(rtable *tab) again: FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e; - rescan: if (limit <= 0) { @@ -2452,26 +2755,21 @@ again: return; } - for (e=n->routes; e; e=e->next) + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->sender->flush_active || (e->flags & REF_DISCARD)) + struct rt_import_hook *s = e->rte.sender; + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { - rte_discard(e); - limit--; - - goto rescan; - } - - if (e->flags & REF_MODIFY) - { - rte_modify(e); + rte_recalculate(e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; } } - if (!n->routes) /* Orphaned FIB entry */ + if (!n->routes && !n->first) /* Orphaned FIB entry */ { FIB_ITERATE_PUT(fit); fib_delete(&tab->fib, n); @@ -2524,21 +2822,203 @@ again: } } - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); - - /* FIXME: This should be handled in a better way */ - rt_prune_sources(); + uint flushed_channels = 0; /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + ih->flush_seq = tab->exporter.next_seq; + rt_set_import_state(ih, TIS_WAITING); + flushed_channels++; + } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } + + /* In some cases, we may want to directly proceed to export cleanup */ + if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) + rt_export_cleanup(tab); +} + +static void +rt_export_cleanup(rtable *tab) +{ + tab->export_used = 0; + + u64 min_seq = ~((u64) 0); + struct rt_pending_export *last_export_to_free = NULL; + struct rt_pending_export *first = tab->exporter.first; + + struct rt_export_hook *eh; + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + { + case TES_DOWN: + continue; + + case TES_READY: + { + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (!last) + /* No last export means that the channel has exported nothing since last cleanup */ + goto done; + + else if (min_seq > last->seq) + { + min_seq = last->seq; + last_export_to_free = last; + } + continue; + } + + default: + /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */ + goto done; + } + } + + tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", + tab->name, + first ? first->seq : 0, + tab->exporter.first ? tab->exporter.first->seq : 0, + min_seq); + + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (last == last_export_to_free) + { + /* This may fail when the channel managed to export more inbetween. This is OK. */ + atomic_compare_exchange_strong_explicit( + &eh->last_export, &last, NULL, + memory_order_release, + memory_order_relaxed); + + DBG("store hook=%p last_export=NULL\n", eh); + } + } + + while (first && (first->seq <= min_seq)) + { + ASSERT_DIE(first->new || first->old); + + const net_addr *n = first->new ? + first->new->rte.net : + first->old->rte.net; + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + + ASSERT_DIE(net->first == first); + + if (first == net->last) + /* The only export here */ + net->last = net->first = NULL; + else + /* First is now the next one */ + net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + + /* For now, the old route may be finally freed */ + if (first->old) + { + rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed"); + hmap_clear(&tab->id_map, first->old->rte.id); + rte_free(first->old); + } + +#ifdef LOCAL_DEBUG + memset(first, 0xbd, sizeof(struct rt_pending_export)); +#endif + + struct rt_export_block *reb = HEAD(tab->exporter.pending); + ASSERT_DIE(reb == PAGE_HEAD(first)); + + u32 pos = (first - &reb->export[0]); + u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); + ASSERT_DIE(pos < end); + + struct rt_pending_export *next = NULL; + + if (++pos < end) + next = &reb->export[pos]; + else + { + rem_node(&reb->n); + +#ifdef LOCAL_DEBUG + memset(reb, 0xbe, page_size); +#endif + + free_page(reb); + + if (EMPTY_LIST(tab->exporter.pending)) { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); + if (config->table_debug) + log(L_TRACE "%s: Resetting export seq", tab->name); + + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); + bmap_reset(&eh->seq_map, 1024); + } + + tab->exporter.next_seq = 1; + } + else + { + reb = HEAD(tab->exporter.pending); + next = &reb->export[0]; } + } + + first = next; + } + + rt_check_cork_low(tab); + +done:; + struct rt_import_hook *ih; node *x; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_WAITING) + if (!first || (first->seq >= ih->flush_seq)) + { + ih->import_state = TIS_CLEARED; + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + } + + if (tab->export_used) + ev_schedule(tab->rt_event); + - return; + if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) + tm_stop(tab->exporter.export_timer); +} + +static void +rt_cork_release_hook(void *data UNUSED) +{ + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); } /** @@ -2637,134 +3117,168 @@ rt_postconfig(struct config *c) */ void -rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { - a->hostentry = he; - a->dest = he->dest; - a->igp_metric = he->igp_metric; + struct { + struct adata ad; + struct hostentry *he; + u32 labels[lnum]; + } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); + + head->he = rt_get_hostentry(tab, gw, ll, dep); + memcpy(head->labels, labels, lnum * sizeof(u32)); + + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_hostentry, 0, &head->ad)); +} - if (a->dest != RTD_UNICAST) + +static void +rta_apply_hostentry(ea_list **to, struct hostentry_adata *head) +{ + struct hostentry *he = head->he; + u32 *labels = head->labels; + u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; + + ea_set_attr_u32(to, &ea_gen_igp_metric, 0, he->igp_metric); + + if (!he->src) { - /* No nexthop */ -no_nexthop: - a->nh = (struct nexthop) {}; - if (mls) - { /* Store the label stack for later changes */ - a->nh.labels_orig = a->nh.labels = mls->len; - memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); - } + ea_set_dest(to, 0, RTD_UNREACHABLE); return; } - if (((!mls) || (!mls->len)) && he->nexthop_linkable) + eattr *he_nh_ea = ea_find(he->src, &ea_gen_nexthop); + ASSERT_DIE(he_nh_ea); + + struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; + int idest = nhea_dest(he_nh_ea); + + if ((idest != RTD_UNICAST) || + !lnum && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ - memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + ea_copy_attr(to, he->src, &ea_gen_nexthop); return; } - struct nexthop *nhp = NULL, *nhr = NULL; - int skip_nexthop = 0; + uint total_size = OFFSETOF(struct nexthop_adata, nh); - for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { - if (skip_nexthop) - skip_nexthop--; - else + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) { - nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, lnum, nh->labels + lnum, MPLS_MAX_LABEL_STACK); + continue; } - memset(nhp, 0, NEXTHOP_MAX_SIZE); - nhp->iface = nh->iface; - nhp->weight = nh->weight; + total_size += NEXTHOP_SIZE_CNT(nh->labels + lnum); + } - if (mls) - { - nhp->labels = nh->labels + mls->len; - nhp->labels_orig = mls->len; - if (nhp->labels <= MPLS_MAX_LABEL_STACK) - { - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ - memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ - } - else - { - log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); - skip_nexthop++; - continue; - } - } - else if (nh->labels) + if (total_size == OFFSETOF(struct nexthop_adata, nh)) + { + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + + struct nexthop_adata nha = { + .ad.length = NEXTHOP_DEST_SIZE, + .dest = RTD_UNREACHABLE, + }; + + ea_set_attr_data(to, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length); + return; + } + + struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); + struct nexthop *dest = &new->nh; + + NEXTHOP_WALK(nh, nhad) + { + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) + continue; + + memcpy(dest, nh, NEXTHOP_SIZE(nh)); + if (lnum) { - nhp->labels = nh->labels; - nhp->labels_orig = 0; - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); + memcpy(&(dest->label[dest->labels]), labels, lnum * sizeof labels[0]); + dest->labels += lnum; } if (ipa_nonzero(nh->gw)) - { - nhp->gw = nh->gw; /* Router nexthop */ - nhp->flags |= (nh->flags & RNF_ONLINK); - } + /* Router nexthop */ + dest->flags = (dest->flags & RNF_ONLINK); else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK)) - nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */ + dest->gw = IPA_NONE; /* PtP link - no need for nexthop */ else if (ipa_nonzero(he->link)) - nhp->gw = he->link; /* Device nexthop with link-local address known */ + dest->gw = he->link; /* Device nexthop with link-local address known */ else - nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + dest->gw = he->addr; /* Device nexthop with link-local address unknown */ + + dest = NEXTHOP_NEXT(dest); } - if (skip_nexthop) - if (nhr) - nhr->next = NULL; - else - { - a->dest = RTD_UNREACHABLE; - log(L_WARN "No valid nexthop remaining, setting route unreachable"); - goto no_nexthop; - } + /* Fix final length */ + new->ad.length = (void *) dest - (void *) new->ad.data; + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &new->ad)); } -static inline int -rta_next_hop_outdated(rta *a) +static inline struct hostentry_adata * +rta_next_hop_outdated(ea_list *a) { - struct hostentry *he = a->hostentry; + /* First retrieve the hostentry */ + eattr *heea = ea_find(a, &ea_gen_hostentry); + if (!heea) + return NULL; - if (!he) - return 0; + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; - if (!he->src) - return a->dest != RTD_UNREACHABLE; + /* If no nexthop is present, we have to create one */ + eattr *a_nh_ea = ea_find(a, &ea_gen_nexthop); + if (!a_nh_ea) + return head; + + struct nexthop_adata *nhad = (struct nexthop_adata *) a_nh_ea->u.ptr; + + /* Shortcut for unresolvable hostentry */ + if (!head->he->src) + return NEXTHOP_IS_REACHABLE(nhad) ? head : NULL; - return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + /* Comparing our nexthop with the hostentry nexthop */ + eattr *he_nh_ea = ea_find(head->he->src, &ea_gen_nexthop); + + return ( + (ea_get_int(a, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) || + (!head->he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr))) + ? head : NULL; } -static inline rte * -rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) +static inline struct rte_storage * +rt_next_hop_update_rte(rtable *tab, net *n, rte *old) { - if (!rta_next_hop_outdated(old->attrs)) + struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); + if (!head) return NULL; - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, old->attrs, rta_size(old->attrs)); + rte e0 = *old; + rta_apply_hostentry(&e0.attrs, head); - mpls_label_stack mls = { .len = a->nh.labels_orig }; - memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + return rte_store(&e0, n, tab); +} - rta_apply_hostentry(a, old->attrs->hostentry, &mls); - a->aflags = 0; +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs, &ea_gen_hostentry); + if (!heea) + return; - rte *e = sl_alloc(rte_slab); - memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(a); + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; - return e; + rta_apply_hostentry(&r->attrs, head); } - #ifdef CONFIG_BGP static inline int @@ -2788,23 +3302,23 @@ net_flow_has_dst_prefix(const net_addr *n) } static inline int -rta_as_path_is_empty(rta *a) +rta_as_path_is_empty(ea_list *a) { - eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(a, "bgp_path"); return !e || (as_path_getlen(e->u.ptr) == 0); } static inline u32 -rta_get_first_asn(rta *a) +rta_get_first_asn(ea_list *a) { - eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(a, "bgp_path"); u32 asn; return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0; } -int -rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior) +static inline enum flowspec_valid +rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list *a, int interior) { ASSERT(rt_is_ip(tab_ip)); ASSERT(rt_is_flow(tab_flow)); @@ -2812,11 +3326,11 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* RFC 8955 6. a) Flowspec has defined dst prefix */ if (!net_flow_has_dst_prefix(n)) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.1. Accept AS_PATH is empty (fr */ if (interior && rta_as_path_is_empty(a)) - return 1; + return FLOWSPEC_VALID; /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */ @@ -2830,33 +3344,36 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* Find best-match BGP unicast route for flowspec dst prefix */ net *nb = net_route(tab_ip, &dst); - rte *rb = nb ? nb->routes : NULL; + const rte *rb = nb ? &nb->routes->rte : NULL; /* Register prefix to trie for tracking further changes */ int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); /* No best-match BGP route -> no flowspec */ - if (!rb || (rb->attrs->source != RTS_BGP)) - return 0; + if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + return FLOWSPEC_INVALID; /* Find ORIGINATOR_ID values */ - u32 orig_a = ea_get_int(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0); - u32 orig_b = ea_get_int(rb->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0); + u32 orig_a = ea_get_int(a, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0); /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ - if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal(a->from, rb->attrs->from))) - return 0; + if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( + ea_get_ip(a, &ea_gen_from, IPA_NONE), + ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE) + ))) + return FLOWSPEC_INVALID; /* Find ASN of the best-match route, for use in next checks */ u32 asn_b = rta_get_first_asn(rb->attrs); if (!asn_b) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */ if (!interior && (rta_get_first_asn(a) != asn_b)) - return 0; + return FLOWSPEC_INVALID; /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ TRIE_WALK(tab_ip->trie, subnet, &dst) @@ -2865,99 +3382,139 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i if (!nc) continue; - rte *rc = nc->routes; - if (rc->attrs->source != RTS_BGP) - return 0; + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + return FLOWSPEC_INVALID; if (rta_get_first_asn(rc->attrs) != asn_b) - return 0; + return FLOWSPEC_INVALID; } TRIE_WALK_END; - return 1; + return FLOWSPEC_VALID; } #endif /* CONFIG_BGP */ -static rte * -rt_flowspec_update_rte(rtable *tab, rte *r) +static struct rte_storage * +rt_flowspec_update_rte(rtable *tab, net *n, rte *r) { #ifdef CONFIG_BGP - if ((r->attrs->source != RTS_BGP) || !r->u.bgp.base_table) + if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) return NULL; - const net_addr *n = r->net->n.addr; - struct bgp_proto *p = (void *) r->attrs->src->proto; - int valid = rt_flowspec_check(r->u.bgp.base_table, tab, n, r->attrs, p->is_interior); - int dest = valid ? RTD_NONE : RTD_UNREACHABLE; - - if (dest == r->attrs->dest) + struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); + if (!bc->base_table) return NULL; - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, r->attrs, rta_size(r->attrs)); - a->dest = dest; - a->aflags = 0; + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + + enum flowspec_valid old = rt_get_flowspec_valid(r), + valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + + if (old == valid) + return NULL; - rte *new = sl_alloc(rte_slab); - memcpy(new, r, sizeof(rte)); - new->attrs = rta_lookup(a); + rte new = *r; + ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid); - return new; + return rte_store(&new, n, tab); #else return NULL; #endif } +static inline void +rt_flowspec_resolve_rte(rte *r, struct channel *c) +{ +#ifdef CONFIG_BGP + enum flowspec_valid valid, old = rt_get_flowspec_valid(r); + struct bgp_channel *bc = (struct bgp_channel *) c; + + if ( (rt_get_source_attr(r) == RTS_BGP) + && (c->channel == &channel_bgp) + && (bc->base_table)) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + valid = rt_flowspec_check( + bc->base_table, + c->in_req.hook->table, + r->net, r->attrs, p->is_interior); + } + else + valid = FLOWSPEC_UNKNOWN; + + if (valid == old) + return; + + if (valid == FLOWSPEC_UNKNOWN) + ea_unset_attr(&r->attrs, 0, &ea_gen_flowspec_valid); + else + ea_set_attr_u32(&r->attrs, &ea_gen_flowspec_valid, 0, valid); +#endif +} static inline int rt_next_hop_update_net(rtable *tab, net *n) { - rte **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; + int is_flow = net_is_flow(n->n.addr); - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) - { - if (!net_is_flow(n->n.addr)) - new = rt_next_hop_update_rte(tab, e); - else - new = rt_flowspec_update_rte(tab, e); + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) + count++; - if (new) + if (!count) + return 0; + + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); + + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) { - *k = new; + struct rte_storage *new = is_flow + ? rt_flowspec_update_rte(tab, n, &e->rte) + : rt_next_hop_update_rte(tab, n, &e->rte); - rte_trace_in(D_ROUTES, new->sender, new, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); + if (!new) + continue; /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ - if (e->attrs->src->proto->rte_recalculate) - e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL); - - if (e != old_best) - rte_free_quick(e); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; - - e = new; - count++; + if (e->rte.src->owner->rte_recalculate) + e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; + + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; + + /* Get a new ID for the route */ + new->rte.lastmod = current_time(); + new->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, new->rte.id); } - } - if (!count) - return 0; + ASSERT_DIE(pos <= count); + count = pos; /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { - if (!new_best || rte_better(e, *new_best)) + if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; } @@ -2970,15 +3527,17 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->sender, new, "updated [best]"); - - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); - - if (free_old_best) - rte_free_quick(old_best); + /* Announce the changes */ + for (int i=0; i<count; i++) + { + _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + const char *best_indicator[2][2] = { + { "autoupdated", "autoupdated [-best]" }, + { "autoupdated [+best]", "autoupdated [best]" } + }; + rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best); + } return count; } @@ -3042,6 +3601,8 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->cork_threshold.low = 128; + c->cork_threshold.high = 512; add_tail(&new_config->tables, &c->n); @@ -3087,6 +3648,36 @@ rt_unlock_table(rtable *r) } } +static void +rt_check_cork_low(rtable *tab) +{ + if (!tab->cork_active) + return; + + if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq)) + { + tab->cork_active = 0; + rt_cork_release(); + + if (config->table_debug) + log(L_TRACE "%s: Uncorked", tab->name); + } +} + +static void +rt_check_cork_high(rtable *tab) +{ + if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) + { + tab->cork_active = 1; + rt_cork_acquire(); + + if (config->table_debug) + log(L_TRACE "%s: Corked", tab->name); + } +} + + static int rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) { @@ -3100,6 +3691,14 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old tab->name = new->name; tab->config = new; + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; } @@ -3158,21 +3757,18 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } -static inline void -do_feed_channel(struct channel *c, net *n, rte *e) +static void +rt_feed_done(struct rt_export_hook *c) { - rte_update_lock(); - if (c->ra_mode == RA_ACCEPTED) - rt_notify_accepted(c, n, NULL, NULL, c->refeeding); - else if (c->ra_mode == RA_MERGED) - rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding); - else /* RA_BASIC */ - rt_notify_basic(c, n, e, e, c->refeeding); - rte_update_unlock(); + c->event->hook = rt_export_hook; + + rt_set_export_state(c, TES_READY); + + rt_send_export_event(c); } /** - * rt_feed_channel - advertise all routes to a channel + * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed * * This function performs one pass of advertisement of routes to a channel that @@ -3180,370 +3776,154 @@ do_feed_channel(struct channel *c, net *n, rte *e) * has something to do. (We avoid transferring all the routes in single pass in * order not to monopolize CPU time.) */ -int -rt_feed_channel(struct channel *c) +static void +rt_feed_by_fib(void *data) { + struct rt_export_hook *c = data; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == ES_FEEDING); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - if (!c->feed_active) - { - FIB_ITERATE_INIT(fit, &c->table->fib); - c->feed_active = 1; - } + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - FIB_ITERATE_START(&c->table->fib, fit, net, n) + FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e = n->routes; if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - return 0; + rt_send_export_event(c); + return; } - if ((c->ra_mode == RA_OPTIMAL) || - (c->ra_mode == RA_ACCEPTED) || - (c->ra_mode == RA_MERGED)) - if (rte_is_valid(e)) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - do_feed_channel(c, n, e); - max_feed--; - } - - if (c->ra_mode == RA_ANY) - for(e = n->routes; e; e = e->next) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - if (!rte_is_valid(e)) - continue; + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; - do_feed_channel(c, n, e); - max_feed--; - } + if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) + max_feed -= rt_feed_net(c, n); } FIB_ITERATE_END; -done: - c->feed_active = 0; - return 1; + rt_feed_done(c); } -/** - * rt_feed_baby_abort - abort protocol feeding - * @c: channel - * - * This function is called by the protocol code when the protocol stops or - * ceases to exist during the feeding. - */ -void -rt_feed_channel_abort(struct channel *c) +static void +rt_feed_by_trie(void *data) { - if (c->feed_active) - { - /* Unlink the iterator */ - fit_get(&c->table->fib, &c->feed_fit); - c->feed_active = 0; - } -} + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + ASSERT_DIE(c->walk_state); + struct f_trie_walk_state *ws = c->walk_state; -/* - * Import table - */ + int max_feed = 256; -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - struct rtable *tab = c->in_table; - rte *old, **pos; - net *net; + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - if (new) + net_addr addr; + while (trie_walk_next(ws, &addr)) { - net = net_get(tab, n); - - if (!new->pref) - new->pref = c->preference; + net *n = net_find(tab, &addr); + if (!n) + continue; - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } - else - { - net = net_find(tab, n); + if ((max_feed -= rt_feed_net(c, n)) <= 0) + return; - if (!net) - goto drop_withdraw; + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; } - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if (old->attrs->src == src) - { - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - - goto drop_update; - } - - /* Move iterator if needed */ - if (old == c->reload_next_rte) - c->reload_next_rte = old->next; - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; - - break; - } - - if (!new) - { - if (!old) - goto drop_withdraw; + rt_unlock_trie(tab, c->walk_lock); + c->walk_lock = NULL; - if (!net->routes) - fib_delete(&tab->fib, net); + mb_free(c->walk_state); + c->walk_state = NULL; - return 1; - } + rt_feed_done(c); +} - struct channel_limit *l = &c->rx_limit; - if (l->action && !old) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); +static void +rt_feed_equal(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = net; + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } + net *n = net_find(tab, c->req->addr); + if (n) + rt_feed_net(c, n); - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; + rt_feed_done(c); +} -drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; - rte_free(new); +static void +rt_feed_for(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - if (!net->routes) - fib_delete(&tab->fib, net); + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); - return 0; + net *n = net_route(tab, c->req->addr); + if (n) + rt_feed_net(c, n); -drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; - return 0; + rt_feed_done(c); } -int -rt_reload_channel(struct channel *c) +static uint +rt_feed_net(struct rt_export_hook *c, net *n) { - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); + uint count = 0; - if (!c->reload_active) + if (c->req->export_bulk) { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } - - do { - for (rte *e = c->reload_next_rte; e; e = e->next) - { - if (max_feed-- <= 0) - { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; - } - - rte_update2(c, e->net->n.addr, rte_do_cow(e), e->attrs->src); - } - - c->reload_next_rte = NULL; - - FIB_ITERATE_START(&tab->fib, fit, net, n) + count = rte_feed_count(n); + if (count) { - if (c->reload_next_rte = n->routes) - { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); } - FIB_ITERATE_END; } - while (c->reload_next_rte); - c->reload_active = 0; - return 1; -} - -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) + else if (n->routes) { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + count = 1; } -} - -void -rt_prune_sync(rtable *t, int all) -{ - struct fib_iterator fit; - FIB_ITERATE_INIT(&fit, &t->fib); + for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_mark_seen(c, rpe); -again: - FIB_ITERATE_START(&t->fib, &fit, net, n) - { - rte *e, **ee = &n->routes; - - while (e = *ee) - { - if (all || (e->flags & (REF_STALE | REF_DISCARD))) - { - *ee = e->next; - rte_free_quick(e); - t->rt_count--; - } - else - ee = &e->next; - } - - if (all || !n->routes) - { - FIB_ITERATE_PUT(&fit); - fib_delete(&t->fib, n); - goto again; - } - } - FIB_ITERATE_END; + return count; } - /* - * Export table + * Import table */ -int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed) +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rtable *tab = c->out_table; - struct rte_src *src; - rte *old, **pos; - net *net; - - if (new) - { - net = net_get(tab, n); - src = new->attrs->src; - - rte_store_tmp_attrs(new, rte_update_pool, NULL); - - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } - else - { - net = net_find(tab, n); - src = old0->attrs->src; + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - if (!net) - goto drop_withdraw; - } - - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if ((c->ra_mode != RA_ANY) || (old->attrs->src == src)) + for (uint i=0; i<count; i++) + if (feed[i]->sender == c->in_req.hook) { - if (new && rte_same(old, new)) - { - /* REF_STALE / REF_DISCARD not used in export table */ - /* - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - */ - - goto drop_update; - } - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; - break; + /* And reload the route */ + rte_update(c, net, &new, new.src); } - - if (!new) - { - if (!old) - goto drop_withdraw; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop_update: - return refeed; - -drop_withdraw: - return 0; } @@ -3642,7 +4022,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) rem_node(&he->ln); hc_remove(hc, he); - sl_free(hc->slab, he); + sl_free(he); hc->hash_items--; if (hc->hash_items < hc->hash_min) @@ -3659,7 +4039,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, tab->rp, HC_DEF_ORDER); hc->slab = sl_new(tab->rp, sizeof(struct hostentry)); - hc->lp = lp_new(tab->rp, LP_GOOD_SIZE(1024)); + hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); tab->hostcache = hc; @@ -3711,56 +4091,31 @@ if_local_addr(ip_addr a, struct iface *i) } u32 -rt_get_igp_metric(rte *rt) +rt_get_igp_metric(const rte *rt) { - eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); + eattr *ea = ea_find(rt->attrs, "igp_metric"); if (ea) return ea->u.data; - rta *a = rt->attrs; - -#ifdef CONFIG_OSPF - if ((a->source == RTS_OSPF) || - (a->source == RTS_OSPF_IA) || - (a->source == RTS_OSPF_EXT1)) - return rt->u.ospf.metric1; -#endif - -#ifdef CONFIG_RIP - if (a->source == RTS_RIP) - return rt->u.rip.metric; -#endif - -#ifdef CONFIG_BGP - if (a->source == RTS_BGP) - { - u64 metric = bgp_total_aigp_metric(rt); - return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); - } -#endif - -#ifdef CONFIG_BABEL - if (a->source == RTS_BABEL) - return rt->u.babel.metric; -#endif - - if (a->source == RTS_DEVICE) + if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); + return IGP_METRIC_UNKNOWN; } static int rt_update_hostentry(rtable *tab, struct hostentry *he) { - rta *old_src = he->src; + ea_list *old_src = he->src; int direct = 0; int pxlen = 0; /* Reset the hostentry */ he->src = NULL; - he->dest = RTD_UNREACHABLE; he->nexthop_linkable = 0; he->igp_metric = 0; @@ -3769,11 +4124,14 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) net *n = net_route(tab, &he_addr); if (n) { - rte *e = n->routes; - rta *a = e->attrs; - pxlen = n->n.addr->pxlen; - - if (a->hostentry) + struct rte_storage *e = n->routes; + ea_list *a = e->rte.attrs; + u32 pref = rt_get_preference(&e->rte); + + for (struct rte_storage *ee = n->routes; ee; ee = ee->next) + if (rte_is_valid(&ee->rte) && + (rt_get_preference(&ee->rte) >= pref) && + ea_find(ee->rte.attrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3781,9 +4139,14 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } - if (a->dest == RTD_UNICAST) - { - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + pxlen = n->n.addr->pxlen; + + eattr *nhea = ea_find(a, &ea_gen_nexthop); + ASSERT_DIE(nhea); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + + if (NEXTHOP_IS_REACHABLE(nhad)) + NEXTHOP_WALK(nh, nhad) if (ipa_zero(nh->gw)) { if (if_local_addr(he->addr, nh->iface)) @@ -3796,12 +4159,10 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) direct++; } - } he->src = rta_clone(a); - he->dest = a->dest; he->nexthop_linkable = !direct; - he->igp_metric = rt_get_igp_metric(e); + he->igp_metric = rt_get_igp_metric(&e->rte); } done: @@ -3839,7 +4200,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -struct hostentry * +static struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; |