diff options
author | Maria Matejka <mq@ucw.cz> | 2022-10-04 16:09:41 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-10-04 16:09:41 +0200 |
commit | f69ba3921a1842f9cac9b9fbd0a32800615da02e (patch) | |
tree | 25d3b5b4ef7ffc4fb9937d6f48806934f4da2b62 /nest | |
parent | a414ba6b975a1187a59cac1f58bc68e5e4b7d37d (diff) | |
parent | fb7fb6744582b2bb74b3b1e32696bd5534e93054 (diff) |
Merge commit 'fb7fb674' into HEAD
Diffstat (limited to 'nest')
-rw-r--r-- | nest/config.Y | 17 | ||||
-rw-r--r-- | nest/proto.c | 144 | ||||
-rw-r--r-- | nest/protocol.h | 15 | ||||
-rw-r--r-- | nest/rt-show.c | 17 | ||||
-rw-r--r-- | nest/rt-table.c | 1696 | ||||
-rw-r--r-- | nest/rt.h | 264 |
6 files changed, 1337 insertions, 816 deletions
diff --git a/nest/config.Y b/nest/config.Y index 91147a29..84c76ae9 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -227,14 +227,13 @@ table_opt: cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); this_table->trie_used = $2; } - | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } - | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } | GC THRESHOLD expr { this_table->gc_threshold = $3; } | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } | CORK THRESHOLD expr expr { if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); this_table->cork_threshold.low = $3; this_table->cork_threshold.high = $4; } + | DEBUG bool { this_table->debug = $2; } ; table_opts: @@ -322,6 +321,8 @@ channel_item_: | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } + | MIN SETTLE TIME expr_us { this_channel->min_settle_time = $4; } + | MAX SETTLE TIME expr_us { this_channel->max_settle_time = $4; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } | IMPORT KEEP FILTERED bool { if ($4) @@ -361,7 +362,11 @@ channel_end: proto_channel: channel_start channel_opt_list channel_end; -rtable: CF_SYM_KNOWN { cf_assert_symbol($1, SYM_TABLE); $$ = $1->table; } ; +rtable: CF_SYM_KNOWN { + cf_assert_symbol($1, SYM_TABLE); + if (!$1->table) rt_new_default_table($1); + $$ = $1->table; +} ; imexport: FILTER filter { $$ = $2; } @@ -390,7 +395,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } - | DEBUG TABLES bool { new_config->table_debug = $3; } + | DEBUG TABLES debug_mask { new_config->table_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -683,6 +688,7 @@ r_args: } | r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); + if (!$3->table) cf_error("Table %s not configured", $3->name); $$ = $1; rt_show_add_table($$, $3->table->table); $$->tables_defined_by = RSD_TDB_DIRECT; @@ -696,7 +702,8 @@ r_args: } | r_args IMPORT TABLE channel_arg { if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_exporter($$, &$4->table->exporter, "import")->prefilter = $4; + RT_LOCKED($4->table, tab) + rt_show_add_exporter($$, &tab->exporter.e, "import")->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { diff --git a/nest/proto.c b/nest/proto.c index 853b1cf9..783a936c 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -55,6 +55,7 @@ static void channel_update_limit(struct channel *c, struct limit *l, int dir, st static void channel_reset_limit(struct channel *c, struct limit *l, int dir); static void channel_feed_end(struct channel *c); static void channel_export_stopped(struct rt_export_request *req); +static void channel_check_stopped(struct channel *c); static inline int proto_is_done(struct proto *p) { return (p->proto_state == PS_DOWN) && proto_is_inactive(p); } @@ -168,7 +169,7 @@ proto_cf_find_channel(struct proto_config *pc, uint net_type) * Returns pointer to channel or NULL */ struct channel * -proto_find_channel_by_table(struct proto *p, struct rtable *t) +proto_find_channel_by_table(struct proto *p, rtable *t) { struct channel *c; @@ -312,10 +313,19 @@ proto_remove_channels(struct proto *p) proto_remove_channel(p, c); } +struct roa_subscription { + node roa_node; + timer t; + btime base_settle_time; /* Start of settling interval */ + struct channel *c; + struct rt_export_request req; +}; + static void -channel_roa_in_changed(void *_data) +channel_roa_in_changed(struct timer *t) { - struct channel *c = _data; + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t); + struct channel *c = s->c; int active = !!c->reload_req.hook; CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); @@ -327,9 +337,11 @@ channel_roa_in_changed(void *_data) } static void -channel_roa_out_changed(void *_data) +channel_roa_out_changed(struct timer *t) { - struct channel *c = _data; + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t); + struct channel *c = s->c; + CD(c, "Feeding triggered by RPKI change"); c->refeed_pending = 1; @@ -338,29 +350,57 @@ channel_roa_out_changed(void *_data) rt_stop_export(&c->out_req, channel_export_stopped); } -/* Temporary code, subscriptions should be changed to resources */ -struct roa_subscription { - struct rt_subscription s; - node roa_node; -}; +static void +channel_export_one_roa(struct rt_export_request *req, const net_addr *net UNUSED, struct rt_pending_export *first) +{ + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req); + + /* TODO: use the information about what roa has changed */ + + if (!tm_active(&s->t)) + { + s->base_settle_time = current_time(); + tm_start(&s->t, s->base_settle_time + s->c->min_settle_time); + } + else + tm_set(&s->t, + MIN(s->base_settle_time + s->c->max_settle_time, + current_time() + s->c->min_settle_time)); + + + rpe_mark_seen_all(req->hook, first, NULL); +} + +static void +channel_dump_roa_req(struct rt_export_request *req) +{ + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req); + struct channel *c = s->c; + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter.e, req->hook->table); + + debug(" Channel %s.%s ROA %s change notifier from table %s request %p\n", + c->proto->name, c->name, + (s->t.hook == channel_roa_in_changed) ? "import" : "export", + tab->name, req); +} static int channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir) { - void (*hook)(void *) = + void (*hook)(struct timer *) = dir ? channel_roa_in_changed : channel_roa_out_changed; struct roa_subscription *s; node *n; WALK_LIST2(s, n, c->roa_subscriptions, roa_node) - if ((s->s.tab == tab) && (s->s.event->hook == hook)) + if ((tab == SKIP_BACK(rtable, priv.exporter.e, s->req.hook->table)) + && (s->t.hook == hook)) return 1; return 0; } - static void channel_roa_subscribe(struct channel *c, rtable *tab, int dir) { @@ -368,28 +408,47 @@ channel_roa_subscribe(struct channel *c, rtable *tab, int dir) return; struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription)); - s->s.event = ev_new_init(c->proto->pool, dir ? channel_roa_in_changed : channel_roa_out_changed, c); - s->s.list = proto_work_list(c->proto); - rt_subscribe(tab, &s->s); + *s = (struct roa_subscription) { + .t = { .hook = dir ? channel_roa_in_changed : channel_roa_out_changed, }, + .c = c, + .req = { + .name = mb_sprintf(c->proto->pool, "%s.%s.roa-%s.%s", + c->proto->name, c->name, dir ? "in" : "out", tab->name), + .list = proto_work_list(c->proto), + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_roa_req, + .export_one = channel_export_one_roa, + }, + }; add_tail(&c->roa_subscriptions, &s->roa_node); + rt_request_export(tab, &s->req); } static void -channel_roa_unsubscribe(struct roa_subscription *s) +channel_roa_unsubscribed(struct rt_export_request *req) { - rt_unsubscribe(&s->s); + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req); + struct channel *c = s->c; + rem_node(&s->roa_node); - rfree(s->s.event); mb_free(s); + + channel_check_stopped(c); +} + +static void +channel_roa_unsubscribe(struct roa_subscription *s) +{ + rt_stop_export(&s->req, channel_roa_unsubscribed); } static void channel_roa_subscribe_filter(struct channel *c, int dir) { const struct filter *f = dir ? c->in_filter : c->out_filter; - struct rtable *tab; + rtable *tab; int valid = 1, found = 0; if ((f == FILTER_ACCEPT) || (f == FILTER_REJECT)) @@ -450,13 +509,10 @@ channel_start_import(struct channel *c) return; } - int nlen = strlen(c->name) + strlen(c->proto->name) + 2; - char *rn = mb_allocz(c->proto->pool, nlen); - bsprintf(rn, "%s.%s", c->proto->name, c->name); - c->in_req = (struct rt_import_request) { - .name = rn, + .name = mb_sprintf(c->proto->pool, "%s.%s", c->proto->name, c->name), .trace_routes = c->debug | c->proto->debug, + .list = proto_work_list(c->proto), .dump_req = channel_dump_import_req, .log_state_change = channel_import_log_state_change, .preimport = channel_preimport, @@ -483,12 +539,9 @@ channel_start_export(struct channel *c) } ASSERT(c->channel_state == CS_UP); - int nlen = strlen(c->name) + strlen(c->proto->name) + 2; - char *rn = mb_allocz(c->proto->pool, nlen); - bsprintf(rn, "%s.%s", c->proto->name, c->name); c->out_req = (struct rt_export_request) { - .name = rn, + .name = mb_sprintf(c->proto->pool, "%s.%s", c->proto->name, c->name), .list = proto_work_list(c->proto), .addr = c->out_subprefix, .addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, @@ -523,7 +576,7 @@ channel_start_export(struct channel *c) } DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); - rt_request_export(&c->table->exporter, &c->out_req); + rt_request_export(c->table, &c->out_req); } static void @@ -532,7 +585,7 @@ channel_check_stopped(struct channel *c) switch (c->channel_state) { case CS_STOP: - if (c->out_req.hook || c->in_req.hook) + if (!EMPTY_LIST(c->roa_subscriptions) || c->out_req.hook || c->in_req.hook) return; channel_set_state(c, CS_DOWN); @@ -540,7 +593,7 @@ channel_check_stopped(struct channel *c) break; case CS_PAUSE: - if (c->out_req.hook) + if (!EMPTY_LIST(c->roa_subscriptions) || c->out_req.hook) return; channel_set_state(c, CS_START); @@ -557,8 +610,6 @@ channel_import_stopped(struct rt_import_request *req) { struct channel *c = SKIP_BACK(struct channel, in_req, req); - req->hook = NULL; - mb_free(c->in_req.name); c->in_req.name = NULL; @@ -577,7 +628,7 @@ channel_export_stopped(struct rt_export_request *req) { c->refeeding = 1; c->refeed_pending = 0; - rt_request_export(&c->table->exporter, req); + rt_request_export(c->table, req); return; } @@ -621,7 +672,7 @@ channel_schedule_reload(struct channel *c) { ASSERT(c->in_req.hook); - rt_request_export(&c->table->exporter, &c->reload_req); + rt_request_export(c->table, &c->reload_req); } static void @@ -864,7 +915,7 @@ channel_config_new(const struct channel_class *cc, const char *name, uint net_ty if (proto->net_type && (net_type != proto->net_type)) cf_error("Different channel type"); - tab = new_config->def_tables[net_type]; + tab = rt_get_default_table(new_config, net_type); } if (!cc) @@ -883,6 +934,9 @@ channel_config_new(const struct channel_class *cc, const char *name, uint net_ty cf->debug = new_config->channel_default_debug; cf->rpki_reload = 1; + cf->min_settle_time = 1 S; + cf->max_settle_time = 20 S; + add_tail(&proto->channels, &cf->n); return cf; @@ -963,6 +1017,22 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->rpki_reload = cf->rpki_reload; + if ( (c->min_settle_time != cf->min_settle_time) + || (c->max_settle_time != cf->max_settle_time)) + { + c->min_settle_time = cf->min_settle_time; + c->max_settle_time = cf->max_settle_time; + + struct roa_subscription *s; + node *n; + + WALK_LIST2(s, n, c->roa_subscriptions, roa_node) + if (tm_active(&s->t)) + tm_set(&s->t, + MIN(s->base_settle_time + c->max_settle_time, + current_time() + c->min_settle_time)); + } + /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; diff --git a/nest/protocol.h b/nest/protocol.h index b4730126..c88598cc 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -18,7 +18,6 @@ struct iface; struct ifa; -struct rtable; struct rte; struct neighbor; struct rta; @@ -187,7 +186,7 @@ struct proto { * rte_remove Called whenever a rte is removed from the routing table. */ - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); @@ -460,6 +459,9 @@ struct channel_config { struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ + btime min_settle_time; /* Minimum settle time for ROA-induced reload */ + btime max_settle_time; /* Maximum settle time for ROA-induced reload */ + u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ u8 ra_mode; /* Mode of received route advertisements (RA_*) */ u16 preference; /* Default route preference */ @@ -476,7 +478,7 @@ struct channel { const struct channel_class *channel; struct proto *proto; - struct rtable *table; + rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ const net_addr *out_subprefix; /* Export only subprefixes of this net */ @@ -487,6 +489,9 @@ struct channel { struct limit in_limit; /* Input limit */ struct limit out_limit; /* Output limit */ + btime min_settle_time; /* Minimum settle time for ROA-induced reload */ + btime max_settle_time; /* Maximum settle time for ROA-induced reload */ + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ u8 limit_active; /* Flags for active limits */ @@ -540,7 +545,7 @@ struct channel { struct rt_exporter *out_table; /* Internal table for exported routes */ - list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ + list roa_subscriptions; /* List of active ROA table subscriptions based on filters' roa_check() calls */ }; #define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ @@ -604,7 +609,7 @@ struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_ty static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) { return proto_cf_find_channel(pc, pc->net_type); } -struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); +struct channel *proto_find_channel_by_table(struct proto *p, rtable *t); struct channel *proto_find_channel_by_name(struct proto *p, const char *n); struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); diff --git a/nest/rt-show.c b/nest/rt-show.c index 17400029..dc88047a 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -301,7 +301,7 @@ rt_show_cont(struct rt_show_data *d) if (d->tables_defined_by & RSD_TDB_SET) rt_show_table(d); - rt_request_export(d->tab->table, &d->req); + rt_request_export_other(d->tab->table, &d->req); } static void @@ -354,9 +354,11 @@ rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char * } struct rt_show_data_rtable * -rt_show_add_table(struct rt_show_data *d, struct rtable *t) +rt_show_add_table(struct rt_show_data *d, rtable *t) { - struct rt_show_data_rtable *rsdr = rt_show_add_exporter(d, &t->exporter, t->name); + struct rt_show_data_rtable *rsdr; + RT_LOCKED(t, tp) + rsdr = rt_show_add_exporter(d, &tp->exporter.e, t->name); struct proto_config *krt = t->config->krt_attached; if (krt) @@ -400,8 +402,8 @@ rt_show_get_default_tables(struct rt_show_data *d) } for (int i=1; i<NET_MAX; i++) - if (config->def_tables[i] && config->def_tables[i]->table) - rt_show_add_table(d, config->def_tables[i]->table); + if (config->def_tables[i] && config->def_tables[i]->table && config->def_tables[i]->table->table) + rt_show_add_table(d, config->def_tables[i]->table->table); } static inline void @@ -418,12 +420,13 @@ rt_show_prepare_tables(struct rt_show_data *d) /* Ensure there is defined export_channel for each table */ if (d->export_mode) { + rtable *rt = SKIP_BACK(rtable, priv.exporter.e, tab->table); if (!tab->export_channel && d->export_channel && - (tab->table == &d->export_channel->table->exporter)) + (rt == d->export_channel->table)) tab->export_channel = d->export_channel; if (!tab->export_channel && d->export_protocol) - tab->export_channel = proto_find_channel_by_table(d->export_protocol, SKIP_BACK(rtable, exporter, tab->table)); + tab->export_channel = proto_find_channel_by_table(d->export_protocol, rt); if (!tab->export_channel) { diff --git a/nest/rt-table.c b/nest/rt-table.c index 3ade4237..95248635 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -43,10 +43,10 @@ * all prefixes that may influence resolving of tracked next hops. * * When a best route changes in the src table, the hostcache is notified using - * rt_notify_hostcache(), which immediately checks using the trie whether the + * an auxiliary export request, which checks using the trie whether the * change is relevant and if it is, then it schedules asynchronous hostcache * recomputation. The recomputation is done by rt_update_hostcache() (called - * from rt_event() of src table), it walks through all hostentries and resolves + * as an event of src table), it walks through all hostentries and resolves * them (by rt_update_hostentry()). It also updates the trie. If a change in * hostentry resolution was found, then it schedules asynchronous nexthop * recomputation of associated dst table. That is done by rt_next_hop_update() @@ -60,15 +60,14 @@ * routes depends of resolving their network prefixes in IP routing tables. This * is similar to the recursive next hop mechanism, but simpler as there are no * intermediate hostcache and hostentries (because flows are less likely to - * share common net prefix than routes sharing a common next hop). In src table, - * there is a list of dst tables (list flowspec_links), this list is updated by - * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during - * channel start/stop). Each dst table has its own trie of prefixes that may - * influence validation of flowspec routes in it (flowspec_trie). + * share common net prefix than routes sharing a common next hop). Every dst + * table has its own export request in every src table. Each dst table has its + * own trie of prefixes that may influence validation of flowspec routes in it + * (flowspec_trie). * - * When a best route changes in the src table, rt_flowspec_notify() immediately - * checks all dst tables from the list using their tries to see whether the - * change is relevant for them. If it is, then an asynchronous re-validation of + * When a best route changes in the src table, the notification mechanism is + * invoked by the export request which checks its dst table's trie to see + * whether the change is relevant, and if so, an asynchronous re-validation of * flowspec routes in the dst table is scheduled. That is also done by function * rt_next_hop_update(), like nexthop recomputation above. It iterates over all * flowspec routes and re-validates them. It also recalculates the trie. @@ -83,9 +82,8 @@ * will be re-validated later in this round anyway. * * The third mechanism is used for RPKI re-validation of IP routes and it is the - * simplest. It is just a list of subscribers in src table, who are notified - * when any change happened, but only after a settle time. Also, in RPKI case - * the dst is not a table, but a channel, who refeeds routes through a filter. + * simplest. It is also an auxiliary export request belonging to the + * appropriate channel, triggering its reload/refeed timer after a settle time. */ #undef LOCAL_DEBUG @@ -105,6 +103,7 @@ #include "lib/string.h" #include "lib/alloca.h" #include "lib/flowspec.h" +#include "lib/idm.h" #ifdef CONFIG_BGP #include "proto/bgp/bgp.h" @@ -129,27 +128,24 @@ struct rt_export_block { struct rt_pending_export export[]; }; -static void rt_free_hostcache(rtable *tab); -static void rt_notify_hostcache(rtable *tab, net *net); -static void rt_update_hostcache(rtable *tab); -static void rt_next_hop_update(rtable *tab); +static void rt_free_hostcache(struct rtable_private *tab); +static void rt_update_hostcache(void *tab); +static void rt_next_hop_update(void *tab); static inline void rt_next_hop_resolve_rte(rte *r); static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); -static inline void rt_prune_table(rtable *tab); -static inline void rt_schedule_notify(rtable *tab); -static void rt_flowspec_notify(rtable *tab, net *net); -static void rt_kick_prune_timer(rtable *tab); +static inline void rt_prune_table(struct rtable_private *tab); +static void rt_kick_prune_timer(struct rtable_private *tab); static void rt_feed_by_fib(void *); static void rt_feed_by_trie(void *); static void rt_feed_equal(void *); static void rt_feed_for(void *); -static uint rt_feed_net(struct rt_export_hook *c, net *n); -static void rt_check_cork_low(rtable *tab); -static void rt_check_cork_high(rtable *tab); +static void rt_check_cork_low(struct rtable_private *tab); +static void rt_check_cork_high(struct rtable_private *tab); static void rt_cork_release_hook(void *); +static void rt_delete(void *); -static inline void rt_export_used(struct rt_exporter *); -static void rt_export_cleanup(rtable *tab); +static void rt_export_used(struct rt_table_exporter *); +static void rt_export_cleanup(struct rtable_private *tab); static int rte_same(rte *x, rte *y); @@ -185,13 +181,18 @@ const char *rt_export_state_name(u8 state) return rt_export_state_name_array[state]; } -static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); -static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +static struct hostentry *rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep); + +#define rt_trace(tab, level, fmt, args...) do {\ + struct rtable_private *t = (tab); \ + if (t->config->debug & (level)) \ + log(L_TRACE "%s: " fmt, t->name, ##args); \ +} while (0) static void net_init_with_trie(struct fib *f, void *N) { - rtable *tab = SKIP_BACK(rtable, fib, f); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, fib, f); net *n = N; if (tab->trie) @@ -202,7 +203,7 @@ net_init_with_trie(struct fib *f, void *N) } static inline net * -net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +net_route_ip4_trie(struct rtable_private *t, const net_addr_ip4 *n0) { TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) { @@ -216,7 +217,7 @@ net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) } static inline net * -net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +net_route_vpn4_trie(struct rtable_private *t, const net_addr_vpn4 *n0) { TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) { @@ -232,7 +233,7 @@ net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) } static inline net * -net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +net_route_ip6_trie(struct rtable_private *t, const net_addr_ip6 *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) { @@ -246,7 +247,7 @@ net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) } static inline net * -net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +net_route_vpn6_trie(struct rtable_private *t, const net_addr_vpn6 *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) { @@ -262,7 +263,7 @@ net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) } static inline void * -net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_trie(struct rtable_private *t, const net_addr_ip6_sadr *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) { @@ -295,7 +296,7 @@ net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) } static inline net * -net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) +net_route_ip4_fib(struct rtable_private *t, const net_addr_ip4 *n0) { net_addr_ip4 n; net_copy_ip4(&n, n0); @@ -311,7 +312,7 @@ net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) } static inline net * -net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +net_route_vpn4_fib(struct rtable_private *t, const net_addr_vpn4 *n0) { net_addr_vpn4 n; net_copy_vpn4(&n, n0); @@ -327,7 +328,7 @@ net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) } static inline net * -net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) +net_route_ip6_fib(struct rtable_private *t, const net_addr_ip6 *n0) { net_addr_ip6 n; net_copy_ip6(&n, n0); @@ -343,7 +344,7 @@ net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) } static inline net * -net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +net_route_vpn6_fib(struct rtable_private *t, const net_addr_vpn6 *n0) { net_addr_vpn6 n; net_copy_vpn6(&n, n0); @@ -359,7 +360,7 @@ net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) } static inline void * -net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_fib(struct rtable_private *t, const net_addr_ip6_sadr *n0) { net_addr_ip6_sadr n; net_copy_ip6_sadr(&n, n0); @@ -399,7 +400,7 @@ net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) } net * -net_route(rtable *tab, const net_addr *n) +net_route(struct rtable_private *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); @@ -442,7 +443,7 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn) { int anything = 0; @@ -470,7 +471,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_fib(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -502,7 +503,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn) { int anything = 0; @@ -530,7 +531,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) } static int -net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_fib(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -577,24 +578,30 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) * must have type NET_IP4 or NET_IP6, respectively. */ int -net_roa_check(rtable *tab, const net_addr *n, u32 asn) +net_roa_check(rtable *tp, const net_addr *n, u32 asn) { - if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - { - if (tab->trie) - return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); - else - return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); - } - else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + int out = ROA_UNKNOWN; + + RT_LOCKED(tp, tab) { - if (tab->trie) - return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) + { + if (tab->trie) + out = net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + out = net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } + else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + { + if (tab->trie) + out = net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + out = net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else - return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + out = ROA_UNKNOWN; /* Should not happen */ } - else - return ROA_UNKNOWN; /* Should not happen */ + return out; } /** @@ -618,7 +625,7 @@ rte_find(net *net, struct rte_src *src) struct rte_storage * -rte_store(const rte *r, net *net, rtable *tab) +rte_store(const rte *r, net *net, struct rtable_private *tab) { struct rte_storage *e = sl_alloc(tab->rte_slab); @@ -902,7 +909,7 @@ channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *r } void -rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, out_req, req); @@ -946,7 +953,7 @@ rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_p done: /* Check obsolete routes for previously exported */ - while (rpe) + RPE_WALK(first, rpe, NULL) { channel_rpe_mark_seen(req, rpe); if (rpe->old) @@ -957,7 +964,6 @@ done: old_best = &rpe->old->rte; } } - rpe = rpe_next(rpe, NULL); } /* Nothing to export */ @@ -1030,7 +1036,7 @@ rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool } void -rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, out_req, req); @@ -1056,7 +1062,7 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen } /* Check obsolete routes for previously exported */ - while (rpe) + RPE_WALK(first, rpe, NULL) { channel_rpe_mark_seen(req, rpe); if (rpe->old) @@ -1067,7 +1073,6 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen old_best = &rpe->old->rte; } } - rpe = rpe_next(rpe, NULL); } /* Prepare new merged route */ @@ -1078,17 +1083,16 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen } void -rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte *o = RTE_VALID_OR_NULL(rpe->old_best); - struct rte_storage *new_best = rpe->new_best; + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; - while (rpe) + RPE_WALK(first, rpe, NULL) { channel_rpe_mark_seen(req, rpe); new_best = rpe->new_best; - rpe = rpe_next(rpe, NULL); } rte n0 = RTE_COPY_VALID(new_best); @@ -1097,27 +1101,26 @@ rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_ } void -rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte *n = RTE_VALID_OR_NULL(rpe->new); - rte *o = RTE_VALID_OR_NULL(rpe->old); + rte *n = RTE_VALID_OR_NULL(first->new); + rte *o = RTE_VALID_OR_NULL(first->old); if (!n && !o) { - channel_rpe_mark_seen(req, rpe); + channel_rpe_mark_seen(req, first); return; } struct rte_src *src = n ? n->src : o->src; - struct rte_storage *new_latest = rpe->new; + struct rte_storage *new_latest = first->new; - while (rpe) + RPE_WALK(first, rpe, src) { channel_rpe_mark_seen(req, rpe); new_latest = rpe->new; - rpe = rpe_next(rpe, src); } rte n0 = RTE_COPY_VALID(new_latest); @@ -1165,9 +1168,11 @@ rpe_next(struct rt_pending_export *rpe, struct rte_src *src) } static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); -static void -rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) +static int +rte_export(struct rt_table_export_hook *th, struct rt_pending_export *rpe) { + rtable *tab = RT_PUB(SKIP_BACK(struct rtable_private, exporter, th->table)); + struct rt_export_hook *hook = &th->h; if (bmap_test(&hook->seq_map, rpe->seq)) goto ignore; /* Seen already */ @@ -1205,6 +1210,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) else if (hook->req->export_bulk) { net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + RT_LOCK(tab); uint count = rte_feed_count(net); rte **feed = NULL; if (count) @@ -1212,6 +1218,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) feed = alloca(count * sizeof(rte *)); rte_feed_obtain(net, feed, count); } + RT_UNLOCK(tab); hook->req->export_bulk(hook->req, n, rpe, feed, count); } else @@ -1219,15 +1226,16 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) ignore: /* Get the next export if exists */ - hook->rpe_next = rt_next_export_fast(rpe); + th->rpe_next = rt_next_export_fast(rpe); /* The last block may be available to free */ - if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) - CALL(hook->table->used, hook->table); + int used = (PAGE_HEAD(th->rpe_next) != PAGE_HEAD(rpe)); /* Releasing this export for cleanup routine */ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); - atomic_store_explicit(&hook->last_export, rpe, memory_order_release); + atomic_store_explicit(&th->last_export, rpe, memory_order_release); + + return used; } /** @@ -1262,7 +1270,7 @@ ignore: * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce(struct rtable_private *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); @@ -1271,23 +1279,12 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage if ((new == old) && (new_best == old_best)) return; - if (new_best_valid || old_best_valid) - { - if (new_best_valid) - new_best->rte.sender->stats.pref++; - if (old_best_valid) - old_best->rte.sender->stats.pref--; - - if (tab->hostcache) - rt_notify_hostcache(tab, net); + if (new_best_valid) + new_best->rte.sender->stats.pref++; + if (old_best_valid) + old_best->rte.sender->stats.pref--; - if (!EMPTY_LIST(tab->flowspec_links)) - rt_flowspec_notify(tab, net); - } - - rt_schedule_notify(tab); - - if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) + if (EMPTY_LIST(tab->exporter.e.hooks) && EMPTY_LIST(tab->exporter.pending)) { /* No export hook and no pending exports to cleanup. We may free the route immediately. */ if (!old) @@ -1356,7 +1353,7 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage &net->last->next, &rpenull, rpe, memory_order_relaxed, memory_order_relaxed)); - + } net->last = rpe; @@ -1368,9 +1365,6 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage tab->exporter.first = rpe; rt_check_cork_high(tab); - - if (!tm_active(tab->exporter.export_timer)) - tm_start(tab->exporter.export_timer, tab->config->export_settle_time); } static struct rt_pending_export * @@ -1399,8 +1393,10 @@ rt_next_export_fast(struct rt_pending_export *last) } static struct rt_pending_export * -rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) +rt_next_export(struct rt_table_export_hook *hook, struct rt_table_exporter *tab) { + ASSERT_DIE(RT_IS_LOCKED(SKIP_BACK(struct rtable_private, exporter, tab))); + /* As the table is locked, it is safe to reload the last export pointer */ struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); @@ -1416,26 +1412,57 @@ rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) static inline void rt_send_export_event(struct rt_export_hook *hook) { - ev_send(hook->req->list, hook->event); + ev_send(hook->req->list, &hook->event); } static void rt_announce_exports(timer *tm) { - rtable *tab = tm->data; + RT_LOCKED((rtable *) tm->data, tab) + if (!EMPTY_LIST(tab->exporter.pending)) + { + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.e.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) + continue; - struct rt_export_hook *c; node *n; - WALK_LIST2(c, n, tab->exporter.hooks, n) + rt_send_export_event(c); + } + } +} + +static void +rt_import_announce_exports(void *_hook) +{ + struct rt_import_hook *hook = _hook; + RT_LOCKED(hook->table, tab) { - if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) - continue; + if (hook->import_state == TIS_CLEARED) + { + void (*stopped)(struct rt_import_request *) = hook->stopped; + struct rt_import_request *req = hook->req; + req->hook = NULL; + + rt_trace(tab, D_EVENTS, "Hook %s stopped", req->name); + rem_node(&hook->n); + mb_free(hook); + rt_unlock_table(tab); + RT_UNLOCK(tab); - rt_send_export_event(c); + stopped(req); + return; + } + + rt_trace(tab, D_EVENTS, "Announcing exports after imports from %s", hook->req->name); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); } } static struct rt_pending_export * -rt_last_export(struct rt_exporter *tab) +rt_last_export(struct rt_table_exporter *tab) { struct rt_pending_export *rpe = NULL; @@ -1455,31 +1482,42 @@ rt_last_export(struct rt_exporter *tab) static void rt_export_hook(void *_data) { - struct rt_export_hook *c = _data; + struct rt_table_export_hook *c = _data; + rtable *tab = SKIP_BACK(rtable, priv.exporter, c->table); - ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_READY); if (!c->rpe_next) { + RT_LOCK(tab); c->rpe_next = rt_next_export(c, c->table); if (!c->rpe_next) { - CALL(c->table->used, c->table); + rt_export_used(c->table); + RT_UNLOCK(tab); return; } + + RT_UNLOCK(tab); } + int used = 0; + /* Process the export */ for (uint i=0; i<RT_EXPORT_BULK; i++) { - rte_export(c, c->rpe_next); + used += rte_export(c, c->rpe_next); if (!c->rpe_next) break; } - rt_send_export_event(c); + if (used) + RT_LOCKED(tab, _) + rt_export_used(c->table); + + rt_send_export_event(&c->h); } @@ -1549,11 +1587,10 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } -static void -rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) +static int +rte_recalculate(struct rtable_private *table, struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { struct rt_import_request *req = c->req; - struct rtable *table = c->table; struct rt_import_stats *stats = &c->stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; @@ -1602,7 +1639,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr /* We need to free the already stored route here before returning */ rte_free(new_stored); - return; + return 0; } *before_old = (*before_old)->next; @@ -1612,7 +1649,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (!old && !new) { stats->withdraws_ignored++; - return; + return 0; } /* If rejected by import limit, we need to pretend there is no route */ @@ -1748,18 +1785,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr rte_announce(table, net, new_stored, old_stored, net->routes, old_best_stored); - if (!net->routes && - (table->gc_counter++ >= table->config->gc_threshold)) - rt_kick_prune_timer(table); - -#if 0 - /* Enable and reimplement these callbacks if anybody wants to use them */ - if (old_ok && p->rte_remove) - p->rte_remove(net, old); - if (new_ok && p->rte_insert) - p->rte_insert(net, &new_stored->rte); -#endif - + return 1; } int @@ -1870,39 +1896,47 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt if (!hook) return; - net *nn; - if (new) + RT_LOCKED(hook->table, tab) + { + net *nn; + if (new) { /* Use the actual struct network, not the dummy one */ - nn = net_get(hook->table, n); + nn = net_get(tab, n); new->net = nn->n.addr; new->sender = hook; /* Set the stale cycle */ new->stale_cycle = hook->stale_set; } - else if (!(nn = net_find(hook->table, n))) + else if (!(nn = net_find(tab, n))) { req->hook->stats.withdraws_ignored++; - return; + RT_RETURN(tab); } - /* And recalculate the best route */ - rte_recalculate(hook, nn, new, src); + /* Recalculate the best route */ + if (rte_recalculate(tab, hook, nn, new, src)) + ev_send(req->list, &hook->announce_event); + } } /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +rt_examine(rtable *tp, net_addr *a, struct channel *c, const struct filter *filter) { - net *n = net_find(t, a); + rte rt = {}; - if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) - return 0; + RT_LOCKED(tp, t) + { + net *n = net_find(t, a); + if (n) + rt = RTE_COPY_VALID(n->routes); + } - rte rt = n->routes->rte; + if (!rt.src) + return 0; - /* Rest is stripped down export_filter() */ int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; if (v == RIC_PROCESS) v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); @@ -1911,34 +1945,41 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte } static void -rt_table_export_done(struct rt_export_hook *hook) +rt_table_export_done(void *hh) { - struct rt_exporter *re = hook->table; - struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + struct rt_table_export_hook *hook = hh; + struct rt_export_request *req = hook->h.req; + void (*stopped)(struct rt_export_request *) = hook->h.stopped; + rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table); - rt_unlock_table(tab); - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + RT_LOCKED(t, tab) + { + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + + /* Drop pending exports */ + rt_export_used(&tab->exporter); + + /* Do the common code; this frees the hook */ + rt_export_stopped(&hook->h); + } + + /* Report the channel as stopped. */ + CALL(stopped, req); + + /* Unlock the table; this may free it */ + rt_unlock_table(t); } -static void -rt_export_stopped(void *data) +void +rt_export_stopped(struct rt_export_hook *hook) { - struct rt_export_hook *hook = data; - struct rt_exporter *tab = hook->table; - - /* Drop pending exports */ - CALL(tab->used, tab); + /* Unlink from the request */ + hook->req->hook = NULL; /* Unlist */ rem_node(&hook->n); - /* Report the channel as stopped. */ - hook->stopped(hook->req); - - /* Reporting the hook as finished. */ - CALL(tab->done, hook); - - /* Free the hook. */ + /* Free the hook itself together with its pool */ rfree(hook->pool); } @@ -1948,8 +1989,7 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state) hook->last_state_change = current_time(); hook->import_state = state; - if (hook->req->log_state_change) - hook->req->log_state_change(hook->req, state); + CALL(hook->req->log_state_change, hook->req, state); } void @@ -1958,26 +1998,28 @@ rt_set_export_state(struct rt_export_hook *hook, u8 state) hook->last_state_change = current_time(); atomic_store_explicit(&hook->export_state, state, memory_order_release); - if (hook->req->log_state_change) - hook->req->log_state_change(hook->req, state); + CALL(hook->req->log_state_change, hook->req, state); } void -rt_request_import(rtable *tab, struct rt_import_request *req) +rt_request_import(rtable *t, struct rt_import_request *req) { - rt_lock_table(tab); + RT_LOCKED(t, tab) + { + rt_lock_table(tab); - struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); - DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + hook->announce_event = (event) { .hook = rt_import_announce_exports, .data = hook }; - hook->req = req; - hook->table = tab; + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); - rt_set_import_state(hook, TIS_UP); + hook->req = req; + hook->table = t; - hook->n = (node) {}; - add_tail(&tab->imports, &hook->n); + rt_set_import_state(hook, TIS_UP); + add_tail(&tab->imports, &hook->n); + } } void @@ -1986,22 +2028,24 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r ASSERT_DIE(req->hook); struct rt_import_hook *hook = req->hook; - rt_schedule_prune(hook->table); - - rt_set_import_state(hook, TIS_STOP); - - hook->stopped = stopped; + RT_LOCKED(hook->table, tab) + { + rt_schedule_prune(tab); + rt_set_import_state(hook, TIS_STOP); + hook->stopped = stopped; + } } -static struct rt_export_hook * -rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +static void +rt_table_export_start_locked(struct rtable_private *tab, struct rt_export_request *req) { - rtable *tab = SKIP_BACK(rtable, exporter, re); + struct rt_exporter *re = &tab->exporter.e; rt_lock_table(tab); - pool *p = rp_new(tab->rp, "Export hook"); - struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); - hook->pool = p; + req->hook = rt_alloc_export(re, sizeof(struct rt_table_export_hook)); + req->hook->req = req; + + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, req->hook); /* stats zeroed by mb_allocz */ switch (req->addr_mode) @@ -2009,24 +2053,25 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) case TE_ADDR_IN: if (tab->trie && net_val_match(tab->addr_type, NB_IP)) { - hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_state = mb_allocz(hook->h.pool, sizeof (struct f_trie_walk_state)); hook->walk_lock = rt_lock_trie(tab); trie_walk_init(hook->walk_state, tab->trie, req->addr); - hook->event = ev_new_init(p, rt_feed_by_trie, hook); + hook->h.event.hook = rt_feed_by_trie; + hook->walk_last.type = 0; break; } /* fall through */ case TE_ADDR_NONE: FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); - hook->event = ev_new_init(p, rt_feed_by_fib, hook); + hook->h.event.hook = rt_feed_by_fib; break; case TE_ADDR_EQUAL: - hook->event = ev_new_init(p, rt_feed_equal, hook); + hook->h.event.hook = rt_feed_equal; break; case TE_ADDR_FOR: - hook->event = ev_new_init(p, rt_feed_for, hook); + hook->h.event.hook = rt_feed_for; break; default: @@ -2035,22 +2080,50 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); - return hook; + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + + rt_init_export(re, req->hook); +} + +static void +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +{ + RT_LOCKED(SKIP_BACK(rtable, priv.exporter.e, re), tab) + rt_table_export_start_locked(tab, req); +} + +void rt_request_export(rtable *t, struct rt_export_request *req) +{ + RT_LOCKED(t, tab) + rt_table_export_start_locked(tab, req); /* Is locked inside */ } void -rt_request_export(struct rt_exporter *re, struct rt_export_request *req) +rt_request_export_other(struct rt_exporter *re, struct rt_export_request *req) +{ + return re->class->start(re, req); +} + +struct rt_export_hook * +rt_alloc_export(struct rt_exporter *re, uint size) { - struct rt_export_hook *hook = req->hook = re->start(re, req); + pool *p = rp_new(re->rp, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, size); - hook->req = req; + hook->pool = p; hook->table = re; - bmap_init(&hook->seq_map, hook->pool, 1024); + return hook; +} - struct rt_pending_export *rpe = rt_last_export(hook->table); - DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); - atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); +void +rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook) +{ + hook->event.data = hook; + + bmap_init(&hook->seq_map, hook->pool, 1024); hook->n = (node) {}; add_tail(&re->hooks, &hook->n); @@ -2061,45 +2134,57 @@ rt_request_export(struct rt_exporter *re, struct rt_export_request *req) } static void -rt_table_export_stop(struct rt_export_hook *hook) +rt_table_export_stop_locked(struct rt_export_hook *hh) { - rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, hook->table); - if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) - return; - - switch (hook->req->addr_mode) - { - case TE_ADDR_IN: - if (hook->walk_lock) - { - rt_unlock_trie(tab, hook->walk_lock); - hook->walk_lock = NULL; - mb_free(hook->walk_state); - hook->walk_state = NULL; + if (atomic_load_explicit(&hh->export_state, memory_order_relaxed) == TES_FEEDING) + switch (hh->req->addr_mode) + { + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); break; - } - /* fall through */ - case TE_ADDR_NONE: - fit_get(&tab->fib, &hook->feed_fit); - break; - } + } +} + +static void +rt_table_export_stop(struct rt_export_hook *hh) +{ + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); + rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table); + if (RT_IS_LOCKED(t)) + rt_table_export_stop_locked(hh); + else + RT_LOCKED(t, tab) + rt_table_export_stop_locked(hh); } void rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) { + ASSERT_DIE(birdloop_inside(req->list->loop)); ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; /* Cancel the feeder event */ - ev_postpone(hook->event); + ev_postpone(&hook->event); /* Stop feeding from the exporter */ - CALL(hook->table->stop, hook); + CALL(hook->table->class->stop, hook); /* Reset the event as the stopped event */ - hook->event->hook = rt_export_stopped; + hook->event.hook = hook->table->class->done; hook->stopped = stopped; /* Update export state */ @@ -2130,12 +2215,15 @@ rt_refresh_begin(struct rt_import_request *req) ASSERT_DIE(hook); ASSERT_DIE(hook->stale_set == hook->stale_valid); + RT_LOCKED(hook->table, tab) + { + /* If the pruning routine is too slow */ if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) { log(L_WARN "Route refresh flood in table %s", hook->table->name); - FIB_WALK(&hook->table->fib, net, n) + FIB_WALK(&tab->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) if (e->rte.sender == req->hook) @@ -2156,6 +2244,8 @@ rt_refresh_begin(struct rt_import_request *req) if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); + + } } /** @@ -2172,13 +2262,16 @@ rt_refresh_end(struct rt_import_request *req) struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); - hook->stale_valid++; - ASSERT_DIE(hook->stale_set == hook->stale_valid); + RT_LOCKED(hook->table, tab) + { + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - rt_schedule_prune(hook->table); + rt_schedule_prune(tab); - if (req->trace_routes & D_STATES) - log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); + } } /** @@ -2203,8 +2296,11 @@ rte_dump(struct rte_storage *e) * This function dumps contents of a given routing table to debug output. */ void -rt_dump(rtable *t) +rt_dump(rtable *tp) { + RT_LOCKED(tp, t) + { + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); @@ -2216,6 +2312,8 @@ rt_dump(rtable *t) } FIB_WALK_END; debug("\n"); + + } } /** @@ -2237,11 +2335,14 @@ rt_dump_all(void) } void -rt_dump_hooks(rtable *tab) +rt_dump_hooks(rtable *tp) { + RT_LOCKED(tp, tab) + { + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); - debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", - tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" nhu_state=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->use_count, tab->rt_count); debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); @@ -2255,15 +2356,18 @@ rt_dump_hooks(rtable *tab) ih->last_state_change, ih->import_state, ih->stopped); } - struct rt_export_hook *eh; - WALK_LIST(eh, tab->exporter.hooks) + struct rt_table_export_hook *eh; + WALK_LIST(eh, tab->exporter.e.hooks) { - eh->req->dump_req(eh->req); + eh->h.req->dump_req(eh->h.req); debug(" Export hook %p requested by %p:" " refeed_pending=%u last_state_change=%t export_state=%u\n", - eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); + eh, eh->h.req, eh->refeed_pending, eh->h.last_state_change, + atomic_load_explicit(&eh->h.export_state, memory_order_relaxed)); } debug("\n"); + + } } void @@ -2282,30 +2386,32 @@ rt_dump_hooks_all(void) } static inline void -rt_schedule_hcu(rtable *tab) +rt_schedule_nhu(struct rtable_private *tab) { - if (tab->hcu_scheduled) - return; - - tab->hcu_scheduled = 1; - ev_schedule(tab->rt_event); -} - -static inline void -rt_schedule_nhu(rtable *tab) -{ - if (tab->nhu_state == NHU_CLEAN) - ev_schedule(tab->rt_event); - - /* state change: - * NHU_CLEAN -> NHU_SCHEDULED - * NHU_RUNNING -> NHU_DIRTY - */ - tab->nhu_state |= NHU_SCHEDULED; + if (tab->nhu_corked) + { + if (!(tab->nhu_corked & NHU_SCHEDULED)) + { + tab->nhu_corked |= NHU_SCHEDULED; + rt_lock_table(tab); + } + } + else if (!(tab->nhu_state & NHU_SCHEDULED)) + { + rt_trace(tab, D_EVENTS, "Scheduling NHU"); + rt_lock_table(tab); + + /* state change: + * NHU_CLEAN -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_DIRTY + */ + if ((tab->nhu_state |= NHU_SCHEDULED) == NHU_SCHEDULED) + ev_schedule(tab->nhu_event); + } } void -rt_schedule_prune(rtable *tab) +rt_schedule_prune(struct rtable_private *tab) { if (tab->prune_state == 0) ev_schedule(tab->rt_event); @@ -2315,12 +2421,12 @@ rt_schedule_prune(rtable *tab) } static void -rt_export_used(struct rt_exporter *e) +rt_export_used(struct rt_table_exporter *e) { - rtable *tab = SKIP_BACK(rtable, exporter, e); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, e); + ASSERT_DIE(RT_IS_LOCKED(tab)); - if (config->table_debug) - log(L_TRACE "%s: Export cleanup requested", tab->name); + rt_trace(tab, D_EVENTS, "Export cleanup requested"); if (tab->export_used) return; @@ -2332,69 +2438,31 @@ rt_export_used(struct rt_exporter *e) static void rt_event(void *ptr) { - rtable *tab = ptr; + RT_LOCKED((rtable *) ptr, tab) + { rt_lock_table(tab); if (tab->export_used) rt_export_cleanup(tab); - if ( - tab->hcu_corked || - tab->nhu_corked || - (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event) - ) - { - if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug) - log(L_TRACE "%s: Auxiliary routines corked", tab->name); - - tab->hcu_corked |= tab->hcu_scheduled; - tab->hcu_scheduled = 0; - - tab->nhu_corked |= tab->nhu_state; - tab->nhu_state = 0; - } - - if (tab->hcu_scheduled) - rt_update_hostcache(tab); - - if (tab->nhu_state) - rt_next_hop_update(tab); - if (tab->prune_state) rt_prune_table(tab); rt_unlock_table(tab); -} - -static void -rt_uncork_event(void *ptr) -{ - rtable *tab = ptr; - - tab->hcu_scheduled |= tab->hcu_corked; - tab->hcu_corked = 0; - - tab->nhu_state |= tab->nhu_corked; - tab->nhu_corked = 0; - - if (config->table_debug) - log(L_TRACE "%s: Auxiliary routines uncorked", tab->name); - - ev_schedule(tab->rt_event); + } } static void rt_prune_timer(timer *t) { - rtable *tab = t->data; - - if (tab->gc_counter >= tab->config->gc_threshold) - rt_schedule_prune(tab); + RT_LOCKED((rtable *) t->data, tab) + if (tab->gc_counter >= tab->config->gc_threshold) + rt_schedule_prune(tab); } static void -rt_kick_prune_timer(rtable *tab) +rt_kick_prune_timer(struct rtable_private *tab) { /* Return if prune is already scheduled */ if (tm_active(tab->prune_timer) || (tab->prune_state & 1)) @@ -2407,153 +2475,132 @@ rt_kick_prune_timer(rtable *tab) } -static inline btime -rt_settled_time(rtable *tab) -{ - ASSUME(tab->base_settle_time != 0); - - return MIN(tab->last_rt_change + tab->config->min_settle_time, - tab->base_settle_time + tab->config->max_settle_time); -} - static void -rt_settle_timer(timer *t) +rt_flowspec_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { - rtable *tab = t->data; - - if (!tab->base_settle_time) - return; + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rtable *dst_pub = ln->dst; + ASSUME(rt_is_flow(dst_pub)); + struct rtable_private *dst = RT_LOCK(dst_pub); - btime settled_time = rt_settled_time(tab); - if (current_time() < settled_time) + /* No need to inspect it further if recalculation is already scheduled */ + if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY) + || !trie_match_net(dst->flowspec_trie, net)) { - tm_set(tab->settle_timer, settled_time); + RT_UNLOCK(dst_pub); + rpe_mark_seen_all(req->hook, first, NULL); return; } - /* Settled */ - tab->base_settle_time = 0; - - struct rt_subscription *s; - WALK_LIST(s, tab->subscribers) - ev_send(s->list, s->event); -} - -static void -rt_kick_settle_timer(rtable *tab) -{ - tab->base_settle_time = current_time(); - - if (!tab->settle_timer) - tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0); - - if (!tm_active(tab->settle_timer)) - tm_set(tab->settle_timer, rt_settled_time(tab)); -} - -static inline void -rt_schedule_notify(rtable *tab) -{ - if (EMPTY_LIST(tab->subscribers)) - return; + /* This net may affect some flowspecs, check the actual change */ + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; - if (tab->base_settle_time) - return; + RPE_WALK(first, rpe, NULL) + { + rpe_mark_seen(req->hook, rpe); + new_best = rpe->new_best; + } - rt_kick_settle_timer(tab); -} + /* Yes, something has actually changed. Schedule the update. */ + if (o != RTE_VALID_OR_NULL(new_best)) + rt_schedule_nhu(dst); -void -rt_subscribe(rtable *tab, struct rt_subscription *s) -{ - s->tab = tab; - rt_lock_table(tab); - DBG("rt_subscribe(%s)\n", tab->name); - add_tail(&tab->subscribers, &s->n); + RT_UNLOCK(dst_pub); } -void -rt_unsubscribe(struct rt_subscription *s) +static void +rt_flowspec_dump_req(struct rt_export_request *req) { - rem_node(&s->n); - rt_unlock_table(s->tab); + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + debug(" Flowspec link for table %s (%p)\n", ln->dst->name, req); } static struct rt_flowspec_link * -rt_flowspec_find_link(rtable *src, rtable *dst) +rt_flowspec_find_link(struct rtable_private *src, rtable *dst) { - struct rt_flowspec_link *ln; - WALK_LIST(ln, src->flowspec_links) - if ((ln->src == src) && (ln->dst == dst)) - return ln; + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, src->exporter.e.hooks, h.n) + switch (atomic_load_explicit(&hook->h.export_state, memory_order_acquire)) + { + case TES_FEEDING: + case TES_READY: + if (hook->h.req->export_one == rt_flowspec_export_one) + { + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, hook->h.req); + if (ln->dst == dst) + return ln; + } + } return NULL; } void -rt_flowspec_link(rtable *src, rtable *dst) +rt_flowspec_link(rtable *src_pub, rtable *dst_pub) { - ASSERT(rt_is_ip(src)); - ASSERT(rt_is_flow(dst)); + ASSERT(rt_is_ip(src_pub)); + ASSERT(rt_is_flow(dst_pub)); - struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + int lock_dst = 0; - if (!ln) + RT_LOCKED(src_pub, src) { - rt_lock_table(src); - rt_lock_table(dst); + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst_pub); - ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link)); - ln->src = src; - ln->dst = dst; - add_tail(&src->flowspec_links, &ln->n); + if (!ln) + { + pool *p = src->rp; + ln = mb_allocz(p, sizeof(struct rt_flowspec_link)); + ln->src = src_pub; + ln->dst = dst_pub; + ln->req = (struct rt_export_request) { + .name = mb_sprintf(p, "%s.flowspec.notifier", dst_pub->name), + .list = &global_work_list, + .trace_routes = src->config->debug, + .dump_req = rt_flowspec_dump_req, + .export_one = rt_flowspec_export_one, + }; + + rt_table_export_start_locked(src, &ln->req); + + lock_dst = 1; + } + + ln->uc++; } - ln->uc++; + if (lock_dst) + rt_lock_table(dst_pub); } -void -rt_flowspec_unlink(rtable *src, rtable *dst) +static void +rt_flowspec_link_stopped(struct rt_export_request *req) { - struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); - - ASSERT(ln && (ln->uc > 0)); - - ln->uc--; + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rtable *dst = ln->dst; - if (!ln->uc) - { - rem_node(&ln->n); - mb_free(ln); - - rt_unlock_table(src); - rt_unlock_table(dst); - } + mb_free(ln); + rt_unlock_table(dst); } -static void -rt_flowspec_notify(rtable *src, net *net) +void +rt_flowspec_unlink(rtable *src, rtable *dst) { - /* Only IP tables are src links */ - ASSERT(rt_is_ip(src)); - struct rt_flowspec_link *ln; - WALK_LIST(ln, src->flowspec_links) + RT_LOCKED(src, t) { - rtable *dst = ln->dst; - ASSERT(rt_is_flow(dst)); + ln = rt_flowspec_find_link(t, dst); - /* No need to inspect it further if recalculation is already active */ - if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY)) - continue; + ASSERT(ln && (ln->uc > 0)); - if (trie_match_net(dst->flowspec_trie, net->n.addr)) - rt_schedule_nhu(dst); + if (!--ln->uc) + rt_stop_export(&ln->req, rt_flowspec_link_stopped); } } static void -rt_flowspec_reset_trie(rtable *tab) +rt_flowspec_reset_trie(struct rtable_private *tab) { linpool *lp = tab->flowspec_trie->lp; int ipv4 = tab->flowspec_trie->ipv4; @@ -2566,7 +2613,9 @@ rt_flowspec_reset_trie(rtable *tab) static void rt_free(resource *_r) { - rtable *r = (rtable *) _r; + struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r); + + DOMAIN_FREE(rtable, r->lock); DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); @@ -2581,7 +2630,6 @@ rt_free(resource *_r) fib_free(&r->fib); hmap_free(&r->id_map); rfree(r->rt_event); - rfree(r->settle_timer); mb_free(r); */ } @@ -2589,26 +2637,42 @@ rt_free(resource *_r) static void rt_res_dump(resource *_r) { - rtable *r = (rtable *) _r; + struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r); + debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", r->name, net_label[r->addr_type], r->rt_count, r->use_count); } static struct resclass rt_class = { .name = "Routing table", - .size = sizeof(struct rtable), + .size = sizeof(rtable), .free = rt_free, .dump = rt_res_dump, .lookup = NULL, .memsize = NULL, }; +static const struct rt_exporter_class rt_table_exporter_class = { + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, +}; + +void +rt_exporter_init(struct rt_exporter *e) +{ + init_list(&e->hooks); +} + +static struct idm rtable_idm; +uint rtable_max_id = 0; + rtable * rt_setup(pool *pp, struct rtable_config *cf) { pool *p = rp_newf(pp, "Routing table %s", cf->name); - rtable *t = ralloc(p, &rt_class); + struct rtable_private *t = ralloc(p, &rt_class); t->rp = p; t->rte_slab = sl_new(p, sizeof(struct rte_storage)); @@ -2616,6 +2680,11 @@ rt_setup(pool *pp, struct rtable_config *cf) t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; + t->id = idm_alloc(&rtable_idm); + if (t->id >= rtable_max_id) + rtable_max_id = t->id + 1; + + t->lock = DOMAIN_NEW(rtable, t->name); fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); @@ -2627,44 +2696,41 @@ rt_setup(pool *pp, struct rtable_config *cf) t->fib.init = net_init_with_trie; } - init_list(&t->flowspec_links); - - t->exporter = (struct rt_exporter) { - .addr_type = t->addr_type, - .start = rt_table_export_start, - .stop = rt_table_export_stop, - .done = rt_table_export_done, - .used = rt_export_used, - }; - - init_list(&t->exporter.hooks); - init_list(&t->exporter.pending); - init_list(&t->imports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); - init_list(&t->subscribers); - t->rt_event = ev_new_init(p, rt_event, t); - t->uncork_event = ev_new_init(p, rt_uncork_event, t); + t->nhu_event = ev_new_init(p, rt_next_hop_update, t); t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); - t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); t->last_rt_change = t->gc_time = current_time(); - t->exporter.next_seq = 1; + + t->exporter = (struct rt_table_exporter) { + .e = { + .class = &rt_table_exporter_class, + .addr_type = t->addr_type, + .rp = t->rp, + }, + .export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0), + .next_seq = 1, + }; + + rt_exporter_init(&t->exporter.e); + + init_list(&t->exporter.pending); t->cork_threshold = cf->cork_threshold; t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; - if (rt_is_flow(t)) + if (rt_is_flow(RT_PUB(t))) { t->flowspec_trie = f_new_trie(lp_new_default(p), 0); t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } - return t; + return RT_PUB(t); } /** @@ -2682,6 +2748,7 @@ rt_init(void) init_list(&deleted_routing_tables); ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); rt_cork.run = (event) { .hook = rt_cork_release_hook }; + idm_init(&rtable_idm, rt_table_pool, 256); } @@ -2700,7 +2767,7 @@ rt_init(void) * iteration. */ static void -rt_prune_table(rtable *tab) +rt_prune_table(struct rtable_private *tab) { struct fib_iterator *fit = &tab->prune_fit; int limit = 2000; @@ -2708,7 +2775,7 @@ rt_prune_table(rtable *tab) struct rt_import_hook *ih; node *n, *x; - DBG("Pruning route table %s\n", tab->name); + rt_trace(tab, D_STATES, "Pruning"); #ifdef DEBUGGING fib_check(&tab->fib); #endif @@ -2762,7 +2829,7 @@ again: (e->rte.stale_cycle < s->stale_valid) || (e->rte.stale_cycle > s->stale_set)) { - rte_recalculate(e->rte.sender, n, NULL, e->rte.src); + rte_recalculate(tab, e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; @@ -2784,6 +2851,10 @@ again: } FIB_ITERATE_END; + rt_trace(tab, D_EVENTS, "Prune done, scheduling export timer"); + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + #ifdef DEBUGGING fib_check(&tab->fib); #endif @@ -2840,24 +2911,25 @@ again: } /* In some cases, we may want to directly proceed to export cleanup */ - if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) + if (EMPTY_LIST(tab->exporter.e.hooks) && flushed_channels) rt_export_cleanup(tab); } static void -rt_export_cleanup(rtable *tab) +rt_export_cleanup(struct rtable_private *tab) { tab->export_used = 0; u64 min_seq = ~((u64) 0); struct rt_pending_export *last_export_to_free = NULL; struct rt_pending_export *first = tab->exporter.first; + int want_prune = 0; - struct rt_export_hook *eh; + struct rt_table_export_hook *eh; node *n; - WALK_LIST2(eh, n, tab->exporter.hooks, n) + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) { - switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + switch (atomic_load_explicit(&eh->h.export_state, memory_order_acquire)) { case TES_DOWN: continue; @@ -2885,16 +2957,14 @@ rt_export_cleanup(rtable *tab) tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; - if (config->table_debug) - log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", - tab->name, + rt_trace(tab, D_STATES, "Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", first ? first->seq : 0, tab->exporter.first ? tab->exporter.first->seq : 0, min_seq); - WALK_LIST2(eh, n, tab->exporter.hooks, n) + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) { - if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY) continue; struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); @@ -2920,7 +2990,7 @@ rt_export_cleanup(rtable *tab) net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); ASSERT_DIE(net->first == first); - + if (first == net->last) /* The only export here */ net->last = net->first = NULL; @@ -2928,6 +2998,8 @@ rt_export_cleanup(rtable *tab) /* First is now the next one */ net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + want_prune += !net->routes && !net->first; + /* For now, the old route may be finally freed */ if (first->old) { @@ -2948,7 +3020,7 @@ rt_export_cleanup(rtable *tab) ASSERT_DIE(pos < end); struct rt_pending_export *next = NULL; - + if (++pos < end) next = &reb->export[pos]; else @@ -2963,17 +3035,16 @@ rt_export_cleanup(rtable *tab) if (EMPTY_LIST(tab->exporter.pending)) { - if (config->table_debug) - log(L_TRACE "%s: Resetting export seq", tab->name); + rt_trace(tab, D_EVENTS, "Resetting export seq"); node *n; - WALK_LIST2(eh, n, tab->exporter.hooks, n) + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) { - if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY) continue; ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); - bmap_reset(&eh->seq_map, 1024); + bmap_reset(&eh->h.seq_map, 1024); } tab->exporter.next_seq = 1; @@ -2997,12 +3068,12 @@ done:; if (!first || (first->seq >= ih->flush_seq)) { ih->import_state = TIS_CLEARED; - ih->stopped(ih->req); - rem_node(&ih->n); - mb_free(ih); - rt_unlock_table(tab); + ev_send(ih->req->list, &ih->announce_event); } + if ((tab->gc_counter += want_prune) >= tab->config->gc_threshold) + rt_kick_prune_timer(tab); + if (tab->export_used) ev_schedule(tab->rt_event); @@ -3035,7 +3106,7 @@ rt_cork_release_hook(void *data UNUSED) * */ struct f_trie * -rt_lock_trie(rtable *tab) +rt_lock_trie(struct rtable_private *tab) { ASSERT(tab->trie); @@ -3052,7 +3123,7 @@ rt_lock_trie(rtable *tab) * It may free the trie and schedule next trie pruning. */ void -rt_unlock_trie(rtable *tab, struct f_trie *trie) +rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie) { ASSERT(trie); @@ -3092,8 +3163,8 @@ rt_preconfig(struct config *c) { init_list(&c->tables); - rt_new_table(cf_get_symbol("master4"), NET_IP4); - rt_new_table(cf_get_symbol("master6"), NET_IP6); + c->def_tables[NET_IP4] = cf_define_symbol(cf_get_symbol("master4"), SYM_TABLE, table, NULL); + c->def_tables[NET_IP6] = cf_define_symbol(cf_get_symbol("master6"), SYM_TABLE, table, NULL); } void @@ -3108,6 +3179,13 @@ rt_postconfig(struct config *c) WALK_LIST(rc, c->tables) if (rc->gc_period == (uint) -1) rc->gc_period = (uint) def_gc_period; + + for (uint net_type = 0; net_type < NET_MAX; net_type++) + if (c->def_tables[net_type] && !c->def_tables[net_type]->table) + { + c->def_tables[net_type]->class = SYM_VOID; + c->def_tables[net_type] = NULL; + } } @@ -3117,7 +3195,7 @@ rt_postconfig(struct config *c) */ void -ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) +ea_set_hostentry(ea_list **to, rtable *dep, rtable *src, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { struct { struct adata ad; @@ -3125,7 +3203,8 @@ ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr g u32 labels[lnum]; } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); - head->he = rt_get_hostentry(tab, gw, ll, dep); + RT_LOCKED(src, tab) + head->he = rt_get_hostentry(tab, gw, ll, dep); memcpy(head->labels, labels, lnum * sizeof(u32)); ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( @@ -3254,17 +3333,16 @@ rta_next_hop_outdated(ea_list *a) ? head : NULL; } -static inline struct rte_storage * -rt_next_hop_update_rte(rtable *tab, net *n, rte *old) +static inline int +rt_next_hop_update_rte(rte *old, rte *new) { struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); if (!head) - return NULL; - - rte e0 = *old; - rta_apply_hostentry(&e0.attrs, head); + return 0; - return rte_store(&e0, n, tab); + *new = *old; + rta_apply_hostentry(&new->attrs, head); + return 1; } static inline void @@ -3322,7 +3400,6 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * { ASSERT(rt_is_ip(tab_ip)); ASSERT(rt_is_flow(tab_flow)); - ASSERT(tab_ip->trie); /* RFC 8955 6. a) Flowspec has defined dst prefix */ if (!net_flow_has_dst_prefix(n)) @@ -3342,32 +3419,45 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * else net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n)); - /* Find best-match BGP unicast route for flowspec dst prefix */ - net *nb = net_route(tab_ip, &dst); - const rte *rb = nb ? &nb->routes->rte : NULL; + rte rb = {}; + net_addr_union nau; + RT_LOCKED(tab_ip, tip) + { + ASSERT(tip->trie); + /* Find best-match BGP unicast route for flowspec dst prefix */ + net *nb = net_route(tip, &dst); + if (nb) + { + rb = RTE_COPY_VALID(nb->routes); + rta_clone(rb.attrs); + net_copy(&nau.n, nb->n.addr); + rb.net = &nau.n; + } + } /* Register prefix to trie for tracking further changes */ int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; - trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); + RT_LOCKED(tab_flow, tfl) + trie_add_prefix(tfl->flowspec_trie, &dst, (rb.net ? rb.net->pxlen : 0), max_pxlen); /* No best-match BGP route -> no flowspec */ - if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + if (!rb.attrs || (rt_get_source_attr(&rb) != RTS_BGP)) return FLOWSPEC_INVALID; /* Find ORIGINATOR_ID values */ u32 orig_a = ea_get_int(a, "bgp_originator_id", 0); - u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb.attrs, "bgp_originator_id", 0); /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( ea_get_ip(a, &ea_gen_from, IPA_NONE), - ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE) + ea_get_ip(rb.attrs, &ea_gen_from, IPA_NONE) ))) return FLOWSPEC_INVALID; /* Find ASN of the best-match route, for use in next checks */ - u32 asn_b = rta_get_first_asn(rb->attrs); + u32 asn_b = rta_get_first_asn(rb.attrs); if (!asn_b) return FLOWSPEC_INVALID; @@ -3376,51 +3466,53 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * return FLOWSPEC_INVALID; /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ - TRIE_WALK(tab_ip->trie, subnet, &dst) + RT_LOCKED(tab_ip, tip) { - net *nc = net_find_valid(tab_ip, &subnet); - if (!nc) - continue; + TRIE_WALK(tip->trie, subnet, &dst) + { + net *nc = net_find_valid(tip, &subnet); + if (!nc) + continue; - const rte *rc = &nc->routes->rte; - if (rt_get_source_attr(rc) != RTS_BGP) - return FLOWSPEC_INVALID; + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + RT_RETURN(tip, FLOWSPEC_INVALID); - if (rta_get_first_asn(rc->attrs) != asn_b) - return FLOWSPEC_INVALID; + if (rta_get_first_asn(rc->attrs) != asn_b) + RT_RETURN(tip, FLOWSPEC_INVALID); + } + TRIE_WALK_END; } - TRIE_WALK_END; return FLOWSPEC_VALID; } #endif /* CONFIG_BGP */ -static struct rte_storage * -rt_flowspec_update_rte(rtable *tab, net *n, rte *r) +static int +rt_flowspec_update_rte(rtable *tab, rte *r, rte *new) { #ifdef CONFIG_BGP if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) - return NULL; + return 0; struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); if (!bc->base_table) - return NULL; + return 0; struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); enum flowspec_valid old = rt_get_flowspec_valid(r), - valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + valid = rt_flowspec_check(bc->base_table, tab, r->net, r->attrs, p->is_interior); if (old == valid) - return NULL; - - rte new = *r; - ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid); + return 0; - return rte_store(&new, n, tab); + *new = *r; + ea_set_attr_u32(&new->attrs, &ea_gen_flowspec_valid, 0, valid); + return 1; #else - return NULL; + return 0; #endif } @@ -3455,10 +3547,9 @@ rt_flowspec_resolve_rte(rte *r, struct channel *c) } static inline int -rt_next_hop_update_net(rtable *tab, net *n) +rt_next_hop_update_net(struct rtable_private *tab, net *n) { - struct rte_storage *new; - int count = 0; + uint count = 0; int is_flow = net_is_flow(n->n.addr); struct rte_storage *old_best = n->routes; @@ -3466,49 +3557,90 @@ rt_next_hop_update_net(rtable *tab, net *n) return 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (is_flow || rta_next_hop_outdated(e->rte.attrs)) - count++; + count++; if (!count) return 0; struct rte_multiupdate { - struct rte_storage *old, *new; - } *updates = alloca(sizeof(struct rte_multiupdate) * count); + struct rte_storage *old, *new_stored; + rte new; + } *updates = tmp_allocz(sizeof(struct rte_multiupdate) * (count+1)); - int pos = 0; + struct rt_pending_export *last_pending = n->last; + + uint pos = 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (is_flow || rta_next_hop_outdated(e->rte.attrs)) - { - struct rte_storage *new = is_flow - ? rt_flowspec_update_rte(tab, n, &e->rte) - : rt_next_hop_update_rte(tab, n, &e->rte); + updates[pos++].old = e; + + /* This is an exceptional place where table can be unlocked while keeping its data: + * the reason why this is safe is that NHU must be always run from the same + * thread as cleanup routines, therefore the only real problem may arise when + * some importer does a change on this particular net (destination) while NHU + * is being computed. Statistically, this should almost never happen. In such + * case, we just drop all the computed changes and do it once again. + * */ + RT_UNLOCK(tab); + + uint mod = 0; + if (is_flow) + for (uint i = 0; i < pos; i++) + mod += rt_flowspec_update_rte(RT_PUB(tab), &updates[i].old->rte, &updates[i].new); - if (!new) - continue; + else + for (uint i = 0; i < pos; i++) + mod += rt_next_hop_update_rte(&updates[i].old->rte, &updates[i].new); - /* Call a pre-comparison hook */ - /* Not really an efficient way to compute this */ - if (e->rte.src->owner->rte_recalculate) - e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + RT_LOCK(RT_PUB(tab)); - updates[pos++] = (struct rte_multiupdate) { - .old = e, - .new = new, - }; + if (!mod) + return 0; + + /* Something has changed inbetween, retry NHU. */ + if (last_pending != n->last) + return rt_next_hop_update_net(tab, n); + + /* Now we reconstruct the original linked list */ + struct rte_storage **nptr = &n->routes; + for (uint i = 0; i < pos; i++) + { + updates[i].old->next = NULL; - /* Replace the route in the list */ - new->next = e->next; - *k = e = new; + struct rte_storage *put; + if (updates[i].new.attrs) + put = updates[i].new_stored = rte_store(&updates[i].new, n, tab); + else + put = updates[i].old; + + *nptr = put; + nptr = &put->next; + } + *nptr = NULL; + /* Call the pre-comparison hooks */ + for (uint i = 0; i < pos; i++) + if (updates[i].new_stored) + { /* Get a new ID for the route */ - new->rte.lastmod = current_time(); - new->rte.id = hmap_first_zero(&tab->id_map); - hmap_set(&tab->id_map, new->rte.id); + updates[i].new_stored->rte.lastmod = current_time(); + updates[i].new_stored->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, updates[i].new_stored->rte.id); + + /* Call a pre-comparison hook */ + /* Not really an efficient way to compute this */ + if (updates[i].old->rte.src->owner->rte_recalculate) + updates[i].old->rte.src->owner->rte_recalculate(tab, n, &updates[i].new_stored->rte, &updates[i].old->rte, &old_best->rte); } - ASSERT_DIE(pos <= count); - count = pos; +#if DEBUGGING + { + uint t = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + t++; + ASSERT_DIE(t == pos); + ASSERT_DIE(pos == count); + } +#endif /* Find the new best route */ struct rte_storage **new_best = NULL; @@ -3519,7 +3651,7 @@ rt_next_hop_update_net(rtable *tab, net *n) } /* Relink the new best route to the first position */ - new = *new_best; + struct rte_storage *new = *new_best; if (new != n->routes) { *new_best = new->next; @@ -3527,88 +3659,155 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } + uint total = 0; /* Announce the changes */ - for (int i=0; i<count; i++) + for (uint i=0; i<count; i++) { - _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + if (!updates[i].new_stored) + continue; + + _Bool nb = (new->rte.src == updates[i].new.src), ob = (i == 0); const char *best_indicator[2][2] = { { "autoupdated", "autoupdated [-best]" }, { "autoupdated [+best]", "autoupdated [best]" } }; - rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); - rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best); + rt_rte_trace_in(D_ROUTES, updates[i].new.sender->req, &updates[i].new, best_indicator[nb][ob]); + rte_announce(tab, n, updates[i].new_stored, updates[i].old, new, old_best); + + total++; } - return count; + return total; } static void -rt_next_hop_update(rtable *tab) +rt_next_hop_update(void *_tab) { + RT_LOCKED((rtable *) _tab, tab) + { + + /* If called from an uncork hook, reset the state */ + if (tab->nhu_corked) + { + ASSERT_DIE(tab->nhu_state == 0); + tab->nhu_state = tab->nhu_corked; + tab->nhu_corked = 0; + rt_trace(tab, D_STATES, "Next hop updater uncorked"); + } + + if (!tab->nhu_state) + bug("Called NHU event for no reason in table %s", tab->name); + + /* Check corkedness */ + if (rt_cork_check(tab->nhu_event)) + { + rt_trace(tab, D_STATES, "Next hop updater corked"); + if ((tab->nhu_state & NHU_RUNNING) + && !EMPTY_LIST(tab->exporter.pending) + && !tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + + tab->nhu_corked = tab->nhu_state; + tab->nhu_state = 0; + RT_RETURN(tab); + } + struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == NHU_CLEAN) - return; - + /* Initialize a new run */ if (tab->nhu_state == NHU_SCHEDULED) - { - FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = NHU_RUNNING; + { + FIB_ITERATE_INIT(fit, &tab->fib); + tab->nhu_state = NHU_RUNNING; - if (tab->flowspec_trie) - rt_flowspec_reset_trie(tab); - } + if (tab->flowspec_trie) + rt_flowspec_reset_trie(tab); + } + /* Walk the fib one net after another */ FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; + ev_schedule(tab->nhu_event); + RT_RETURN(tab); } + lp_state lps; + lp_save(tmp_linpool, &lps); max_feed -= rt_next_hop_update_net(tab, n); + lp_restore(tmp_linpool, &lps); } FIB_ITERATE_END; + /* Finished NHU, cleanup */ + rt_trace(tab, D_EVENTS, "NHU done, scheduling export timer"); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + /* State change: * NHU_DIRTY -> NHU_SCHEDULED * NHU_RUNNING -> NHU_CLEAN */ - tab->nhu_state &= 1; + if ((tab->nhu_state &= NHU_SCHEDULED) == NHU_SCHEDULED) + ev_schedule(tab->nhu_event); - if (tab->nhu_state != NHU_CLEAN) - ev_schedule(tab->rt_event); + rt_unlock_table(tab); + + } } +void +rt_new_default_table(struct symbol *s) +{ + for (uint addr_type = 0; addr_type < NET_MAX; addr_type++) + if (s == new_config->def_tables[addr_type]) + { + s->table = rt_new_table(s, addr_type); + return; + } + + bug("Requested an unknown new default table: %s", s->name); +} struct rtable_config * -rt_new_table(struct symbol *s, uint addr_type) +rt_get_default_table(struct config *cf, uint addr_type) { - /* Hack that allows to 'redefine' the master table */ - if ((s->class == SYM_TABLE) && - (s->table == new_config->def_tables[addr_type]) && - ((addr_type == NET_IP4) || (addr_type == NET_IP6))) - return s->table; + struct symbol *ts = cf->def_tables[addr_type]; + if (!ts) + return NULL; + if (!ts->table) + rt_new_default_table(ts); + + return ts->table; +} + +struct rtable_config * +rt_new_table(struct symbol *s, uint addr_type) +{ struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config)); - cf_define_symbol(s, SYM_TABLE, table, c); + if (s == new_config->def_tables[addr_type]) + s->table = c; + else + cf_define_symbol(s, SYM_TABLE, table, c); + c->name = s->name; c->addr_type = addr_type; c->gc_threshold = 1000; c->gc_period = (uint) -1; /* set in rt_postconfig() */ - c->min_settle_time = 1 S; - c->max_settle_time = 20 S; c->cork_threshold.low = 128; c->cork_threshold.high = 512; + c->debug = new_config->table_debug; add_tail(&new_config->tables, &c->n); /* First table of each type is kept as default */ if (! new_config->def_tables[addr_type]) - new_config->def_tables[addr_type] = c; + new_config->def_tables[addr_type] = s; return c; } @@ -3622,8 +3821,9 @@ rt_new_table(struct symbol *s, uint addr_type) * configuration. */ void -rt_lock_table(rtable *r) +rt_lock_table_priv(struct rtable_private *r, const char *file, uint line) { + rt_trace(r, D_STATES, "Locked at %s:%d", file, line); r->use_count++; } @@ -3636,20 +3836,32 @@ rt_lock_table(rtable *r) * for deletion by configuration changes. */ void -rt_unlock_table(rtable *r) +rt_unlock_table_priv(struct rtable_private *r, const char *file, uint line) { + rt_trace(r, D_STATES, "Unlocked at %s:%d", file, line); if (!--r->use_count && r->deleted) - { - struct config *conf = r->deleted; + /* Schedule the delete event to finish this up */ + ev_send(&global_event_list, ev_new_init(r->rp, rt_delete, r)); +} - /* Delete the routing table by freeing its pool */ - rt_shutdown(r); - config_del_obstacle(conf); - } +static void +rt_delete(void *tab_) +{ + /* We assume that nobody holds the table reference now as use_count is zero. + * Anyway the last holder may still hold the lock. Therefore we lock and + * unlock it the last time to be sure that nobody is there. */ + struct rtable_private *tab = RT_LOCK((rtable *) tab_); + struct config *conf = tab->deleted; + + RT_UNLOCK(RT_PUB(tab)); + + rfree(tab->rp); + config_del_obstacle(conf); } + static void -rt_check_cork_low(rtable *tab) +rt_check_cork_low(struct rtable_private *tab) { if (!tab->cork_active) return; @@ -3659,27 +3871,25 @@ rt_check_cork_low(rtable *tab) tab->cork_active = 0; rt_cork_release(); - if (config->table_debug) - log(L_TRACE "%s: Uncorked", tab->name); + rt_trace(tab, D_STATES, "Uncorked"); } } static void -rt_check_cork_high(rtable *tab) +rt_check_cork_high(struct rtable_private *tab) { if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) { tab->cork_active = 1; rt_cork_acquire(); - if (config->table_debug) - log(L_TRACE "%s: Corked", tab->name); + rt_trace(tab, D_STATES, "Corked"); } } static int -rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) +rt_reconfigure(struct rtable_private *tab, struct rtable_config *new, struct rtable_config *old) { if ((new->addr_type != old->addr_type) || (new->sorted != old->sorted) || @@ -3687,10 +3897,18 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old return 0; DBG("\t%s: same\n", new->name); - new->table = tab; + new->table = RT_PUB(tab); tab->name = new->name; tab->config = new; + if (tab->hostcache) + tab->hostcache->req.trace_routes = new->debug; + + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, tab->exporter.e.hooks, h.n) + if (hook->h.req->export_one == rt_flowspec_export_one) + hook->h.req->trace_routes = new->debug; + tab->cork_threshold = new->cork_threshold; if (new->cork_threshold.high != old->cork_threshold.high) @@ -3731,19 +3949,32 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *tab = o->table; + struct rtable_private *tab = RT_LOCK(o->table); + if (tab->deleted) + { + RT_UNLOCK(tab); continue; + } r = rt_find_table_config(new, o->name); if (r && !new->shutdown && rt_reconfigure(tab, r, o)) + { + RT_UNLOCK(tab); continue; + } DBG("\t%s: deleted\n", o->name); tab->deleted = old; config_add_obstacle(old); rt_lock_table(tab); + + if (tab->hostcache) + rt_stop_export(&tab->hostcache->req, NULL); + rt_unlock_table(tab); + + RT_UNLOCK(tab); } } @@ -3760,13 +3991,81 @@ rt_commit(struct config *new, struct config *old) static void rt_feed_done(struct rt_export_hook *c) { - c->event->hook = rt_export_hook; + c->event.hook = rt_export_hook; rt_set_export_state(c, TES_READY); rt_send_export_event(c); } +#define MAX_FEED_BLOCK 1024 +typedef struct { + uint cnt, pos; + union { + struct rt_pending_export *rpe; + struct { + rte **feed; + uint *start; + }; + }; +} rt_feed_block; + +static int +rt_prepare_feed(struct rt_table_export_hook *c, net *n, rt_feed_block *b) +{ + if (n->routes) + { + if (c->h.req->export_bulk) + { + uint cnt = rte_feed_count(n); + if (b->cnt && (b->cnt + cnt > MAX_FEED_BLOCK)) + return 0; + + if (!b->cnt) + { + b->feed = tmp_alloc(sizeof(rte *) * MAX(MAX_FEED_BLOCK, cnt)); + b->start = tmp_alloc(sizeof(uint) * ((cnt >= MAX_FEED_BLOCK) ? 2 : (MAX_FEED_BLOCK + 2 - cnt))); + } + + rte_feed_obtain(n, &b->feed[b->cnt], cnt); + b->start[b->pos++] = b->cnt; + b->cnt += cnt; + } + else if (b->pos == MAX_FEED_BLOCK) + return 0; + else + { + if (!b->pos) + b->rpe = tmp_alloc(sizeof(struct rt_pending_export) * MAX_FEED_BLOCK); + + b->rpe[b->pos++] = (struct rt_pending_export) { .new = n->routes, .new_best = n->routes }; + } + } + + rpe_mark_seen_all(&c->h, n->first, NULL); + return 1; +} + +static void +rt_process_feed(struct rt_table_export_hook *c, rt_feed_block *b) +{ + if (!b->pos) + return; + + if (c->h.req->export_bulk) + { + b->start[b->pos] = b->cnt; + for (uint p = 0; p < b->pos; p++) + { + rte **feed = &b->feed[b->start[p]]; + c->h.req->export_bulk(c->h.req, feed[0]->net, NULL, feed, b->start[p+1] - b->start[p]); + } + } + else + for (uint p = 0; p < b->pos; p++) + c->h.req->export_one(c->h.req, b->rpe[p].new->rte.net, &b->rpe[p]); +} + /** * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed @@ -3779,61 +4078,73 @@ rt_feed_done(struct rt_export_hook *c) static void rt_feed_by_fib(void *data) { - struct rt_export_hook *c = data; - + struct rt_table_export_hook *c = data; struct fib_iterator *fit = &c->feed_fit; - int max_feed = 256; + rt_feed_block block = {}; - ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { FIB_ITERATE_START(&tab->fib, fit, net, n) { - if (max_feed <= 0) + if ((c->h.req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->h.req->addr)) + { + if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING) + return; + + if (!rt_prepare_feed(c, n, &block)) { FIB_ITERATE_PUT(fit); - rt_send_export_event(c); + RT_UNLOCK(tab); + rt_process_feed(c, &block); + rt_send_export_event(&c->h); return; } - - if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) - return; - - if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) - max_feed -= rt_feed_net(c, n); + } } FIB_ITERATE_END; + } - rt_feed_done(c); + rt_process_feed(c, &block); + rt_feed_done(&c->h); } static void rt_feed_by_trie(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { ASSERT_DIE(c->walk_state); struct f_trie_walk_state *ws = c->walk_state; - int max_feed = 256; + ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); - ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + do { + if (!c->walk_last.type) + continue; - net_addr addr; - while (trie_walk_next(ws, &addr)) - { - net *n = net_find(tab, &addr); + net *n = net_find(tab, &c->walk_last); if (!n) continue; - if ((max_feed -= rt_feed_net(c, n)) <= 0) - return; + if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING) + RT_RETURN(tab); - if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + if (!rt_prepare_feed(c, n, &block)) + { + RT_UNLOCK(tab); + rt_process_feed(c, &block); + rt_send_export_event(&c->h); return; + } } + while (trie_walk_next(ws, &c->walk_last)); rt_unlock_trie(tab, c->walk_lock); c->walk_lock = NULL; @@ -3841,70 +4152,59 @@ rt_feed_by_trie(void *data) mb_free(c->walk_state); c->walk_state = NULL; - rt_feed_done(c); + c->walk_last.type = 0; + + } + + rt_process_feed(c, &block); + rt_feed_done(&c->h); } static void rt_feed_equal(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + net *n; + + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_EQUAL); - ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); + if (n = net_find(tab, c->h.req->addr)) + ASSERT_DIE(rt_prepare_feed(c, n, &block)); + } - net *n = net_find(tab, c->req->addr); if (n) - rt_feed_net(c, n); + rt_process_feed(c, &block); - rt_feed_done(c); + rt_feed_done(&c->h); } static void rt_feed_for(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); - - ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); - - net *n = net_route(tab, c->req->addr); - if (n) - rt_feed_net(c, n); - - rt_feed_done(c); -} - -static uint -rt_feed_net(struct rt_export_hook *c, net *n) -{ - uint count = 0; + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + net *n; - if (c->req->export_bulk) + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) { - count = rte_feed_count(n); - if (count) - { - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); - } - } + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_FOR); - else if (n->routes) - { - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - count = 1; + if (n = net_route(tab, c->h.req->addr)) + ASSERT_DIE(rt_prepare_feed(c, n, &block)); } - for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) - rpe_mark_seen(c, rpe); + if (n) + rt_process_feed(c, &block); - return count; + rt_feed_done(&c->h); } + /* * Import table */ @@ -4030,7 +4330,41 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) } static void -rt_init_hostcache(rtable *tab) +hc_notify_dump_req(struct rt_export_request *req) +{ + debug(" Table %s (%p)\n", req->name, req); +} + +static void +hc_notify_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) +{ + struct hostcache *hc = SKIP_BACK(struct hostcache, req, req); + + /* No interest in this update, mark seen only */ + if (ev_active(&hc->update) || !trie_match_net(hc->trie, net)) + { + rpe_mark_seen_all(req->hook, first, NULL); + return; + } + + /* This net may affect some hostentries, check the actual change */ + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; + + RPE_WALK(first, rpe, NULL) + { + rpe_mark_seen(req->hook, rpe); + new_best = rpe->new_best; + } + + /* Yes, something has actually changed. Do the hostcache update. */ + if (o != RTE_VALID_OR_NULL(new_best)) + ev_schedule_work(&hc->update); +} + + +static void +rt_init_hostcache(struct rtable_private *tab) { struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache)); init_list(&hc->hostentries); @@ -4042,11 +4376,26 @@ rt_init_hostcache(rtable *tab) hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); + hc->update = (event) { + .hook = rt_update_hostcache, + .data = tab, + }; + + hc->req = (struct rt_export_request) { + .name = mb_sprintf(tab->rp, "%s.hcu.notifier", tab->name), + .list = &global_work_list, + .trace_routes = tab->config->debug, + .dump_req = hc_notify_dump_req, + .export_one = hc_notify_export_one, + }; + + rt_table_export_start_locked(tab, &hc->req); + tab->hostcache = hc; } static void -rt_free_hostcache(rtable *tab) +rt_free_hostcache(struct rtable_private *tab) { struct hostcache *hc = tab->hostcache; @@ -4068,16 +4417,6 @@ rt_free_hostcache(rtable *tab) */ } -static void -rt_notify_hostcache(rtable *tab, net *net) -{ - if (tab->hcu_scheduled) - return; - - if (trie_match_net(tab->hostcache->trie, net->n.addr)) - rt_schedule_hcu(tab); -} - static int if_local_addr(ip_addr a, struct iface *i) { @@ -4108,7 +4447,7 @@ rt_get_igp_metric(const rte *rt) } static int -rt_update_hostentry(rtable *tab, struct hostentry *he) +rt_update_hostentry(struct rtable_private *tab, struct hostentry *he) { ea_list *old_src = he->src; int direct = 0; @@ -4174,9 +4513,24 @@ done: } static void -rt_update_hostcache(rtable *tab) +rt_update_hostcache(void *data) { + rtable **nhu_pending; + + RT_LOCKED((rtable *) data, tab) + { + struct hostcache *hc = tab->hostcache; + + if (rt_cork_check(&hc->update)) + { + rt_trace(tab, D_STATES, "Hostcache update corked"); + RT_RETURN(tab); + } + + /* Destination schedule map */ + nhu_pending = tmp_allocz(sizeof(rtable *) * rtable_max_id); + struct hostentry *he; node *n, *x; @@ -4194,14 +4548,18 @@ rt_update_hostcache(rtable *tab) } if (rt_update_hostentry(tab, he)) - rt_schedule_nhu(he->tab); + nhu_pending[he->tab->id] = he->tab; } + } - tab->hcu_scheduled = 0; + for (uint i=0; i<rtable_max_id; i++) + if (nhu_pending[i]) + RT_LOCKED(nhu_pending[i], dst) + rt_schedule_nhu(dst); } static struct hostentry * -rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) +rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; @@ -27,6 +27,7 @@ struct protocol; struct proto; struct channel; struct rte_src; +struct hostcache; struct symbol; struct timer; struct filter; @@ -52,31 +53,37 @@ struct rt_cork_threshold { struct rtable_config { node n; char *name; - struct rtable *table; + union rtable *table; struct proto_config *krt_attached; /* Kernel syncer attached to this table */ uint addr_type; /* Type of address data stored in table (NET_*) */ uint gc_threshold; /* Maximum number of operations before GC is run */ uint gc_period; /* Approximate time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ byte trie_used; /* Rtable has attached trie */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ + byte debug; /* Whether to log */ btime export_settle_time; /* Delay before exports are announced */ struct rt_cork_threshold cork_threshold; /* Cork threshold values */ }; struct rt_export_hook; struct rt_export_request; +struct rt_exporter; + +struct rt_exporter_class { + void (*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(void *_rt_export_hook); +}; struct rt_exporter { + const struct rt_exporter_class *class; + pool *rp; list hooks; /* Registered route export hooks */ uint addr_type; /* Type of address data exported (NET_*) */ +}; - struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); - void (*stop)(struct rt_export_hook *); - void (*done)(struct rt_export_hook *); - void (*used)(struct rt_exporter *); - +struct rt_table_exporter { + struct rt_exporter e; list pending; /* List of packed struct rt_pending_export */ struct timer *export_timer; @@ -84,39 +91,50 @@ struct rt_exporter { u64 next_seq; /* The next export will have this ID */ }; -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ +extern uint rtable_max_id; + +DEFINE_DOMAIN(rtable); + +/* The public part of rtable structure */ +#define RTABLE_PUBLIC \ + resource r; \ + node n; /* Node in list of all tables */ \ + char *name; /* Name of this table */ \ + uint addr_type; /* Type of address data stored in table (NET_*) */ \ + uint id; /* Integer table ID for fast lookup */ \ + DOMAIN(rtable) lock; /* Lock to take to access the private parts */ \ + struct rtable_config *config; /* Configuration of this table */ \ + +/* The complete rtable structure */ +struct rtable_private { + /* Once more the public part */ + RTABLE_PUBLIC; + + /* Here the private items not to be accessed without locking */ pool *rp; /* Resource pool to allocate everything from, including itself */ struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; struct f_trie *trie; /* Trie of prefixes defined in fib */ - char *name; /* Name of this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ list imports; /* Registered route importers */ - struct rt_exporter exporter; /* Exporter API structure */ + struct rt_table_exporter exporter; /* Exporter API structure */ struct hmap id_map; struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ struct config *deleted; /* Table doesn't exist in current configuration, * delete as soon as use_count becomes 0 and remove * obstacle from this routing table. */ struct event *rt_event; /* Routing table event */ - struct event *uncork_event; /* Called when uncork happens */ + struct event *nhu_event; /* Specific event for next hop update */ struct timer *prune_timer; /* Timer for periodic pruning / GC */ btime last_rt_change; /* Last time when route changed */ - btime base_settle_time; /* Start time of rtable settling interval */ btime gc_time; /* Time of last GC */ uint gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte prune_trie; /* Prune prefix trie during next table prune */ - byte hcu_scheduled; /* Hostcache update is scheduled */ - byte hcu_corked; /* Hostcache update is corked with this state */ byte nhu_state; /* Next Hop Update state */ byte nhu_corked; /* Next Hop Update is corked with this state */ byte export_used; /* Pending Export pruning is scheduled */ @@ -130,25 +148,28 @@ typedef struct rtable { u32 trie_old_lock_count; /* Old prefix trie locked by walks */ struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ - list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +}; + +/* The final union private-public rtable structure */ +typedef union rtable { + struct { + RTABLE_PUBLIC; + }; + struct rtable_private priv; } rtable; -struct rt_subscription { - node n; - rtable *tab; - event *event; - event_list *list; -}; +#define RT_IS_LOCKED(tab) DOMAIN_IS_LOCKED(rtable, (tab)->lock) -struct rt_flowspec_link { - node n; - rtable *src; - rtable *dst; - u32 uc; -}; +#define RT_LOCK(tab) ({ LOCK_DOMAIN(rtable, (tab)->lock); &(tab)->priv; }) +#define RT_UNLOCK(tab) UNLOCK_DOMAIN(rtable, (tab)->lock) +#define RT_PRIV(tab) ({ ASSERT_DIE(RT_IS_LOCKED((tab))); &(tab)->priv; }) +#define RT_PUB(tab) SKIP_BACK(rtable, priv, tab) + +#define RT_LOCKED(tpub, tpriv) for (struct rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL)) +#define RT_RETURN(tpriv, ...) do { RT_UNLOCK(tpriv); return __VA_ARGS__; } while (0) + +#define RT_PRIV_SAME(tpriv, tpub) (&(tpub)->priv == (tpriv)) extern struct rt_cork { _Atomic uint active; @@ -184,43 +205,12 @@ static inline int rt_cork_check(event *e) } -#define NHU_CLEAN 0 -#define NHU_SCHEDULED 1 -#define NHU_RUNNING 2 -#define NHU_DIRTY 3 - typedef struct network { struct rte_storage *routes; /* Available routes for this network */ struct rt_pending_export *first, *last; struct fib_node n; /* FIB flags reserved for kernel syncer */ } net; -struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; -}; - -struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - ea_list *src; /* Source attributes */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ -}; - struct rte_storage { struct rte_storage *next; /* Next in chain */ struct rte rte; /* Route data */ @@ -238,6 +228,8 @@ struct rt_import_request { char *name; u8 trace_routes; + event_list *list; /* Where to schedule announce events */ + void (*dump_req)(struct rt_import_request *req); void (*log_state_change)(struct rt_import_request *req, u8 state); /* Preimport is called when the @new route is just-to-be inserted, replacing @old. @@ -269,6 +261,7 @@ struct rt_import_hook { u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ + event announce_event; /* This event announces table updates */ }; struct rt_pending_export { @@ -314,29 +307,44 @@ struct rt_export_hook { u32 withdraws_received; /* Number of route withdraws received */ } stats; + btime last_state_change; /* Time of last state transition */ + + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + struct event event; /* Event running all the export operations */ + + struct bmap seq_map; /* Keep track which exports were already procesed */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +struct rt_table_export_hook { + union { + struct rt_export_hook h; + struct { /* Overriding the parent structure beginning */ + node _n; + struct rt_table_exporter *table; + }; + }; + union { struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ struct { struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ struct f_trie *walk_lock; /* Locked trie for walking */ + union { /* Last net visited but not processed */ + net_addr walk_last; + net_addr_ip4 walk_last_ip4; + net_addr_ip6 walk_last_ip6; + }; }; - u32 hash_iter; /* Iterator over hash */ }; - struct bmap seq_map; /* Keep track which exports were already procesed */ - - struct rt_pending_export * _Atomic last_export;/* Last export processed */ + struct rt_pending_export *_Atomic last_export;/* Last export processed */ struct rt_pending_export *rpe_next; /* Next pending export to process */ - btime last_state_change; /* Time of last state transition */ - u8 refeed_pending; /* Refeeding and another refeed is scheduled */ - _Atomic u8 export_state; /* Route export state (TES_*, see below) */ u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ - struct event *event; /* Event running all the export operations */ - - void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ }; #define TIS_DOWN 0 @@ -365,7 +373,8 @@ struct rt_export_hook { #define TFT_HASH 3 void rt_request_import(rtable *tab, struct rt_import_request *req); -void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); +void rt_request_export(rtable *tab, struct rt_export_request *req); +void rt_request_export_other(struct rt_exporter *tab, struct rt_export_request *req); void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req); @@ -382,15 +391,35 @@ void rt_set_export_state(struct rt_export_hook *hook, u8 state); void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); +/* + * For table export processing + */ + /* Get next rpe. If src is given, it must match. */ struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); +/* Walk all rpe's */ +#define RPE_WALK(first, it, src) \ + for (struct rt_pending_export *it = (first); it; it = rpe_next(it, (src))) + /* Mark the pending export processed */ void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); +#define rpe_mark_seen_all(hook, first, src) \ + RPE_WALK((first), _rpe, (src)) rpe_mark_seen((hook), _rpe) + /* Get pending export seen status */ int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); +/* + * For rt_export_hook and rt_exporter inheritance + */ + +void rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook); +struct rt_export_hook *rt_alloc_export(struct rt_exporter *re, uint size); +void rt_export_stopped(struct rt_export_hook *hook); +void rt_exporter_init(struct rt_exporter *re); + /* Types of route announcement, also used as flags */ #define RA_UNDEF 0 /* Undefined RA type */ #define RA_OPTIMAL 1 /* Announcement of optimal route change */ @@ -404,6 +433,49 @@ int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); #define RIC_REJECT -1 /* Rejected by protocol */ #define RIC_DROP -2 /* Silently dropped by protocol */ +/* + * Next hop update data structures + */ + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + ea_list *src; /* Source attributes */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + event update; + struct rt_export_request req; /* Notifier */ +}; + +struct rt_flowspec_link { + rtable *src; + rtable *dst; + u32 uc; + struct rt_export_request req; +}; + #define rte_update channel_rte_import /** * rte_update - enter a new update to a routing table @@ -446,32 +518,36 @@ void rt_init(void); void rt_preconfig(struct config *); void rt_postconfig(struct config *); void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); -struct f_trie * rt_lock_trie(rtable *tab); -void rt_unlock_trie(rtable *tab, struct f_trie *trie); -void rt_subscribe(rtable *tab, struct rt_subscription *s); -void rt_unsubscribe(struct rt_subscription *s); +void rt_lock_table_priv(struct rtable_private *, const char *file, uint line); +void rt_unlock_table_priv(struct rtable_private *, const char *file, uint line); +static inline void rt_lock_table_pub(rtable *t, const char *file, uint line) +{ RT_LOCKED(t, tt) rt_lock_table_priv(tt, file, line); } +static inline void rt_unlock_table_pub(rtable *t, const char *file, uint line) +{ RT_LOCKED(t, tt) rt_unlock_table_priv(tt, file, line); } + +#define rt_lock_table(t) _Generic((t), rtable *: rt_lock_table_pub, \ + struct rtable_private *: rt_lock_table_priv)((t), __FILE__, __LINE__) +#define rt_unlock_table(t) _Generic((t), rtable *: rt_unlock_table_pub, \ + struct rtable_private *: rt_unlock_table_priv)((t), __FILE__, __LINE__) + +struct f_trie * rt_lock_trie(struct rtable_private *tab); +void rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie); void rt_flowspec_link(rtable *src, rtable *dst); void rt_flowspec_unlink(rtable *src, rtable *dst); rtable *rt_setup(pool *, struct rtable_config *); -static inline void rt_shutdown(rtable *r) { rfree(r->rp); } -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) +static inline net *net_find(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(struct rtable_private *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -net *net_get(rtable *tab, const net_addr *addr); -net *net_route(rtable *tab, const net_addr *n); +static inline net *net_get(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_route(struct rtable_private *tab, const net_addr *n); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); void rt_refresh_begin(struct rt_import_request *); void rt_refresh_end(struct rt_import_request *); void rt_modify_stale(rtable *t, struct rt_import_request *); -void rt_schedule_prune(rtable *t); +void rt_schedule_prune(struct rtable_private *t); void rte_dump(struct rte_storage *); -void rte_free(struct rte_storage *); -struct rte_storage *rte_store(const rte *, net *net, rtable *); void rt_dump(rtable *); void rt_dump_all(void); void rt_dump_hooks(rtable *); @@ -481,6 +557,8 @@ void rt_reload_channel_abort(struct channel *c); void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); +void rt_new_default_table(struct symbol *s); +struct rtable_config *rt_get_default_table(struct config *cf, uint addr_type); static inline int rt_is_ip(rtable *tab) { return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } @@ -530,7 +608,7 @@ struct rt_show_data { void rt_show(struct rt_show_data *); struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); /* Value of table definition mode in struct rt_show_data */ #define RSD_TDB_DEFAULT 0 /* no table specified */ @@ -557,7 +635,7 @@ struct hostentry_adata { }; void -ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); +ea_set_hostentry(ea_list **to, rtable *dep, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); |