summaryrefslogtreecommitdiff
path: root/nest
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-10-04 16:09:41 +0200
committerMaria Matejka <mq@ucw.cz>2022-10-04 16:09:41 +0200
commitf69ba3921a1842f9cac9b9fbd0a32800615da02e (patch)
tree25d3b5b4ef7ffc4fb9937d6f48806934f4da2b62 /nest
parenta414ba6b975a1187a59cac1f58bc68e5e4b7d37d (diff)
parentfb7fb6744582b2bb74b3b1e32696bd5534e93054 (diff)
Merge commit 'fb7fb674' into HEAD
Diffstat (limited to 'nest')
-rw-r--r--nest/config.Y17
-rw-r--r--nest/proto.c144
-rw-r--r--nest/protocol.h15
-rw-r--r--nest/rt-show.c17
-rw-r--r--nest/rt-table.c1696
-rw-r--r--nest/rt.h264
6 files changed, 1337 insertions, 816 deletions
diff --git a/nest/config.Y b/nest/config.Y
index 91147a29..84c76ae9 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -227,14 +227,13 @@ table_opt:
cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]);
this_table->trie_used = $2;
}
- | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; }
- | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; }
| GC THRESHOLD expr { this_table->gc_threshold = $3; }
| GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); }
| CORK THRESHOLD expr expr {
if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold.");
this_table->cork_threshold.low = $3;
this_table->cork_threshold.high = $4; }
+ | DEBUG bool { this_table->debug = $2; }
;
table_opts:
@@ -322,6 +321,8 @@ channel_item_:
| RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; }
| IMPORT LIMIT limit_spec { this_channel->in_limit = $3; }
| EXPORT LIMIT limit_spec { this_channel->out_limit = $3; }
+ | MIN SETTLE TIME expr_us { this_channel->min_settle_time = $4; }
+ | MAX SETTLE TIME expr_us { this_channel->max_settle_time = $4; }
| PREFERENCE expr { this_channel->preference = $2; check_u16($2); }
| IMPORT KEEP FILTERED bool {
if ($4)
@@ -361,7 +362,11 @@ channel_end:
proto_channel: channel_start channel_opt_list channel_end;
-rtable: CF_SYM_KNOWN { cf_assert_symbol($1, SYM_TABLE); $$ = $1->table; } ;
+rtable: CF_SYM_KNOWN {
+ cf_assert_symbol($1, SYM_TABLE);
+ if (!$1->table) rt_new_default_table($1);
+ $$ = $1->table;
+} ;
imexport:
FILTER filter { $$ = $2; }
@@ -390,7 +395,7 @@ debug_default:
DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; }
| DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; }
| DEBUG COMMANDS expr { new_config->cli_debug = $3; }
- | DEBUG TABLES bool { new_config->table_debug = $3; }
+ | DEBUG TABLES debug_mask { new_config->table_debug = $3; }
;
/* MRTDUMP PROTOCOLS is in systep/unix/config.Y */
@@ -683,6 +688,7 @@ r_args:
}
| r_args TABLE symbol_known {
cf_assert_symbol($3, SYM_TABLE);
+ if (!$3->table) cf_error("Table %s not configured", $3->name);
$$ = $1;
rt_show_add_table($$, $3->table->table);
$$->tables_defined_by = RSD_TDB_DIRECT;
@@ -696,7 +702,8 @@ r_args:
}
| r_args IMPORT TABLE channel_arg {
if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name);
- rt_show_add_exporter($$, &$4->table->exporter, "import")->prefilter = $4;
+ RT_LOCKED($4->table, tab)
+ rt_show_add_exporter($$, &tab->exporter.e, "import")->prefilter = $4;
$$->tables_defined_by = RSD_TDB_DIRECT;
}
| r_args EXPORT TABLE channel_arg {
diff --git a/nest/proto.c b/nest/proto.c
index 853b1cf9..783a936c 100644
--- a/nest/proto.c
+++ b/nest/proto.c
@@ -55,6 +55,7 @@ static void channel_update_limit(struct channel *c, struct limit *l, int dir, st
static void channel_reset_limit(struct channel *c, struct limit *l, int dir);
static void channel_feed_end(struct channel *c);
static void channel_export_stopped(struct rt_export_request *req);
+static void channel_check_stopped(struct channel *c);
static inline int proto_is_done(struct proto *p)
{ return (p->proto_state == PS_DOWN) && proto_is_inactive(p); }
@@ -168,7 +169,7 @@ proto_cf_find_channel(struct proto_config *pc, uint net_type)
* Returns pointer to channel or NULL
*/
struct channel *
-proto_find_channel_by_table(struct proto *p, struct rtable *t)
+proto_find_channel_by_table(struct proto *p, rtable *t)
{
struct channel *c;
@@ -312,10 +313,19 @@ proto_remove_channels(struct proto *p)
proto_remove_channel(p, c);
}
+struct roa_subscription {
+ node roa_node;
+ timer t;
+ btime base_settle_time; /* Start of settling interval */
+ struct channel *c;
+ struct rt_export_request req;
+};
+
static void
-channel_roa_in_changed(void *_data)
+channel_roa_in_changed(struct timer *t)
{
- struct channel *c = _data;
+ struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t);
+ struct channel *c = s->c;
int active = !!c->reload_req.hook;
CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : "");
@@ -327,9 +337,11 @@ channel_roa_in_changed(void *_data)
}
static void
-channel_roa_out_changed(void *_data)
+channel_roa_out_changed(struct timer *t)
{
- struct channel *c = _data;
+ struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t);
+ struct channel *c = s->c;
+
CD(c, "Feeding triggered by RPKI change");
c->refeed_pending = 1;
@@ -338,29 +350,57 @@ channel_roa_out_changed(void *_data)
rt_stop_export(&c->out_req, channel_export_stopped);
}
-/* Temporary code, subscriptions should be changed to resources */
-struct roa_subscription {
- struct rt_subscription s;
- node roa_node;
-};
+static void
+channel_export_one_roa(struct rt_export_request *req, const net_addr *net UNUSED, struct rt_pending_export *first)
+{
+ struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req);
+
+ /* TODO: use the information about what roa has changed */
+
+ if (!tm_active(&s->t))
+ {
+ s->base_settle_time = current_time();
+ tm_start(&s->t, s->base_settle_time + s->c->min_settle_time);
+ }
+ else
+ tm_set(&s->t,
+ MIN(s->base_settle_time + s->c->max_settle_time,
+ current_time() + s->c->min_settle_time));
+
+
+ rpe_mark_seen_all(req->hook, first, NULL);
+}
+
+static void
+channel_dump_roa_req(struct rt_export_request *req)
+{
+ struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req);
+ struct channel *c = s->c;
+ struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter.e, req->hook->table);
+
+ debug(" Channel %s.%s ROA %s change notifier from table %s request %p\n",
+ c->proto->name, c->name,
+ (s->t.hook == channel_roa_in_changed) ? "import" : "export",
+ tab->name, req);
+}
static int
channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir)
{
- void (*hook)(void *) =
+ void (*hook)(struct timer *) =
dir ? channel_roa_in_changed : channel_roa_out_changed;
struct roa_subscription *s;
node *n;
WALK_LIST2(s, n, c->roa_subscriptions, roa_node)
- if ((s->s.tab == tab) && (s->s.event->hook == hook))
+ if ((tab == SKIP_BACK(rtable, priv.exporter.e, s->req.hook->table))
+ && (s->t.hook == hook))
return 1;
return 0;
}
-
static void
channel_roa_subscribe(struct channel *c, rtable *tab, int dir)
{
@@ -368,28 +408,47 @@ channel_roa_subscribe(struct channel *c, rtable *tab, int dir)
return;
struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription));
- s->s.event = ev_new_init(c->proto->pool, dir ? channel_roa_in_changed : channel_roa_out_changed, c);
- s->s.list = proto_work_list(c->proto);
- rt_subscribe(tab, &s->s);
+ *s = (struct roa_subscription) {
+ .t = { .hook = dir ? channel_roa_in_changed : channel_roa_out_changed, },
+ .c = c,
+ .req = {
+ .name = mb_sprintf(c->proto->pool, "%s.%s.roa-%s.%s",
+ c->proto->name, c->name, dir ? "in" : "out", tab->name),
+ .list = proto_work_list(c->proto),
+ .trace_routes = c->debug | c->proto->debug,
+ .dump_req = channel_dump_roa_req,
+ .export_one = channel_export_one_roa,
+ },
+ };
add_tail(&c->roa_subscriptions, &s->roa_node);
+ rt_request_export(tab, &s->req);
}
static void
-channel_roa_unsubscribe(struct roa_subscription *s)
+channel_roa_unsubscribed(struct rt_export_request *req)
{
- rt_unsubscribe(&s->s);
+ struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req);
+ struct channel *c = s->c;
+
rem_node(&s->roa_node);
- rfree(s->s.event);
mb_free(s);
+
+ channel_check_stopped(c);
+}
+
+static void
+channel_roa_unsubscribe(struct roa_subscription *s)
+{
+ rt_stop_export(&s->req, channel_roa_unsubscribed);
}
static void
channel_roa_subscribe_filter(struct channel *c, int dir)
{
const struct filter *f = dir ? c->in_filter : c->out_filter;
- struct rtable *tab;
+ rtable *tab;
int valid = 1, found = 0;
if ((f == FILTER_ACCEPT) || (f == FILTER_REJECT))
@@ -450,13 +509,10 @@ channel_start_import(struct channel *c)
return;
}
- int nlen = strlen(c->name) + strlen(c->proto->name) + 2;
- char *rn = mb_allocz(c->proto->pool, nlen);
- bsprintf(rn, "%s.%s", c->proto->name, c->name);
-
c->in_req = (struct rt_import_request) {
- .name = rn,
+ .name = mb_sprintf(c->proto->pool, "%s.%s", c->proto->name, c->name),
.trace_routes = c->debug | c->proto->debug,
+ .list = proto_work_list(c->proto),
.dump_req = channel_dump_import_req,
.log_state_change = channel_import_log_state_change,
.preimport = channel_preimport,
@@ -483,12 +539,9 @@ channel_start_export(struct channel *c)
}
ASSERT(c->channel_state == CS_UP);
- int nlen = strlen(c->name) + strlen(c->proto->name) + 2;
- char *rn = mb_allocz(c->proto->pool, nlen);
- bsprintf(rn, "%s.%s", c->proto->name, c->name);
c->out_req = (struct rt_export_request) {
- .name = rn,
+ .name = mb_sprintf(c->proto->pool, "%s.%s", c->proto->name, c->name),
.list = proto_work_list(c->proto),
.addr = c->out_subprefix,
.addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE,
@@ -523,7 +576,7 @@ channel_start_export(struct channel *c)
}
DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req);
- rt_request_export(&c->table->exporter, &c->out_req);
+ rt_request_export(c->table, &c->out_req);
}
static void
@@ -532,7 +585,7 @@ channel_check_stopped(struct channel *c)
switch (c->channel_state)
{
case CS_STOP:
- if (c->out_req.hook || c->in_req.hook)
+ if (!EMPTY_LIST(c->roa_subscriptions) || c->out_req.hook || c->in_req.hook)
return;
channel_set_state(c, CS_DOWN);
@@ -540,7 +593,7 @@ channel_check_stopped(struct channel *c)
break;
case CS_PAUSE:
- if (c->out_req.hook)
+ if (!EMPTY_LIST(c->roa_subscriptions) || c->out_req.hook)
return;
channel_set_state(c, CS_START);
@@ -557,8 +610,6 @@ channel_import_stopped(struct rt_import_request *req)
{
struct channel *c = SKIP_BACK(struct channel, in_req, req);
- req->hook = NULL;
-
mb_free(c->in_req.name);
c->in_req.name = NULL;
@@ -577,7 +628,7 @@ channel_export_stopped(struct rt_export_request *req)
{
c->refeeding = 1;
c->refeed_pending = 0;
- rt_request_export(&c->table->exporter, req);
+ rt_request_export(c->table, req);
return;
}
@@ -621,7 +672,7 @@ channel_schedule_reload(struct channel *c)
{
ASSERT(c->in_req.hook);
- rt_request_export(&c->table->exporter, &c->reload_req);
+ rt_request_export(c->table, &c->reload_req);
}
static void
@@ -864,7 +915,7 @@ channel_config_new(const struct channel_class *cc, const char *name, uint net_ty
if (proto->net_type && (net_type != proto->net_type))
cf_error("Different channel type");
- tab = new_config->def_tables[net_type];
+ tab = rt_get_default_table(new_config, net_type);
}
if (!cc)
@@ -883,6 +934,9 @@ channel_config_new(const struct channel_class *cc, const char *name, uint net_ty
cf->debug = new_config->channel_default_debug;
cf->rpki_reload = 1;
+ cf->min_settle_time = 1 S;
+ cf->max_settle_time = 20 S;
+
add_tail(&proto->channels, &cf->n);
return cf;
@@ -963,6 +1017,22 @@ channel_reconfigure(struct channel *c, struct channel_config *cf)
c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug;
c->rpki_reload = cf->rpki_reload;
+ if ( (c->min_settle_time != cf->min_settle_time)
+ || (c->max_settle_time != cf->max_settle_time))
+ {
+ c->min_settle_time = cf->min_settle_time;
+ c->max_settle_time = cf->max_settle_time;
+
+ struct roa_subscription *s;
+ node *n;
+
+ WALK_LIST2(s, n, c->roa_subscriptions, roa_node)
+ if (tm_active(&s->t))
+ tm_set(&s->t,
+ MIN(s->base_settle_time + c->max_settle_time,
+ current_time() + c->min_settle_time));
+ }
+
/* Execute channel-specific reconfigure hook */
if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed))
return 0;
diff --git a/nest/protocol.h b/nest/protocol.h
index b4730126..c88598cc 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -18,7 +18,6 @@
struct iface;
struct ifa;
-struct rtable;
struct rte;
struct neighbor;
struct rta;
@@ -187,7 +186,7 @@ struct proto {
* rte_remove Called whenever a rte is removed from the routing table.
*/
- int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
+ int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte *, struct rte *, struct rte *);
int (*rte_mergable)(struct rte *, struct rte *);
void (*rte_insert)(struct network *, struct rte *);
void (*rte_remove)(struct network *, struct rte *);
@@ -460,6 +459,9 @@ struct channel_config {
struct channel_limit in_limit; /* Limit for importing routes from protocol */
struct channel_limit out_limit; /* Limit for exporting routes to protocol */
+ btime min_settle_time; /* Minimum settle time for ROA-induced reload */
+ btime max_settle_time; /* Maximum settle time for ROA-induced reload */
+
u8 net_type; /* Routing table network type (NET_*), 0 for undefined */
u8 ra_mode; /* Mode of received route advertisements (RA_*) */
u16 preference; /* Default route preference */
@@ -476,7 +478,7 @@ struct channel {
const struct channel_class *channel;
struct proto *proto;
- struct rtable *table;
+ rtable *table;
const struct filter *in_filter; /* Input filter */
const struct filter *out_filter; /* Output filter */
const net_addr *out_subprefix; /* Export only subprefixes of this net */
@@ -487,6 +489,9 @@ struct channel {
struct limit in_limit; /* Input limit */
struct limit out_limit; /* Output limit */
+ btime min_settle_time; /* Minimum settle time for ROA-induced reload */
+ btime max_settle_time; /* Maximum settle time for ROA-induced reload */
+
u8 limit_actions[PLD_MAX]; /* Limit actions enum */
u8 limit_active; /* Flags for active limits */
@@ -540,7 +545,7 @@ struct channel {
struct rt_exporter *out_table; /* Internal table for exported routes */
- list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */
+ list roa_subscriptions; /* List of active ROA table subscriptions based on filters' roa_check() calls */
};
#define RIK_REJECTED 1 /* Routes rejected in import filter are kept */
@@ -604,7 +609,7 @@ struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_ty
static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc)
{ return proto_cf_find_channel(pc, pc->net_type); }
-struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t);
+struct channel *proto_find_channel_by_table(struct proto *p, rtable *t);
struct channel *proto_find_channel_by_name(struct proto *p, const char *n);
struct channel *proto_add_channel(struct proto *p, struct channel_config *cf);
int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf);
diff --git a/nest/rt-show.c b/nest/rt-show.c
index 17400029..dc88047a 100644
--- a/nest/rt-show.c
+++ b/nest/rt-show.c
@@ -301,7 +301,7 @@ rt_show_cont(struct rt_show_data *d)
if (d->tables_defined_by & RSD_TDB_SET)
rt_show_table(d);
- rt_request_export(d->tab->table, &d->req);
+ rt_request_export_other(d->tab->table, &d->req);
}
static void
@@ -354,9 +354,11 @@ rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *
}
struct rt_show_data_rtable *
-rt_show_add_table(struct rt_show_data *d, struct rtable *t)
+rt_show_add_table(struct rt_show_data *d, rtable *t)
{
- struct rt_show_data_rtable *rsdr = rt_show_add_exporter(d, &t->exporter, t->name);
+ struct rt_show_data_rtable *rsdr;
+ RT_LOCKED(t, tp)
+ rsdr = rt_show_add_exporter(d, &tp->exporter.e, t->name);
struct proto_config *krt = t->config->krt_attached;
if (krt)
@@ -400,8 +402,8 @@ rt_show_get_default_tables(struct rt_show_data *d)
}
for (int i=1; i<NET_MAX; i++)
- if (config->def_tables[i] && config->def_tables[i]->table)
- rt_show_add_table(d, config->def_tables[i]->table);
+ if (config->def_tables[i] && config->def_tables[i]->table && config->def_tables[i]->table->table)
+ rt_show_add_table(d, config->def_tables[i]->table->table);
}
static inline void
@@ -418,12 +420,13 @@ rt_show_prepare_tables(struct rt_show_data *d)
/* Ensure there is defined export_channel for each table */
if (d->export_mode)
{
+ rtable *rt = SKIP_BACK(rtable, priv.exporter.e, tab->table);
if (!tab->export_channel && d->export_channel &&
- (tab->table == &d->export_channel->table->exporter))
+ (rt == d->export_channel->table))
tab->export_channel = d->export_channel;
if (!tab->export_channel && d->export_protocol)
- tab->export_channel = proto_find_channel_by_table(d->export_protocol, SKIP_BACK(rtable, exporter, tab->table));
+ tab->export_channel = proto_find_channel_by_table(d->export_protocol, rt);
if (!tab->export_channel)
{
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 3ade4237..95248635 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -43,10 +43,10 @@
* all prefixes that may influence resolving of tracked next hops.
*
* When a best route changes in the src table, the hostcache is notified using
- * rt_notify_hostcache(), which immediately checks using the trie whether the
+ * an auxiliary export request, which checks using the trie whether the
* change is relevant and if it is, then it schedules asynchronous hostcache
* recomputation. The recomputation is done by rt_update_hostcache() (called
- * from rt_event() of src table), it walks through all hostentries and resolves
+ * as an event of src table), it walks through all hostentries and resolves
* them (by rt_update_hostentry()). It also updates the trie. If a change in
* hostentry resolution was found, then it schedules asynchronous nexthop
* recomputation of associated dst table. That is done by rt_next_hop_update()
@@ -60,15 +60,14 @@
* routes depends of resolving their network prefixes in IP routing tables. This
* is similar to the recursive next hop mechanism, but simpler as there are no
* intermediate hostcache and hostentries (because flows are less likely to
- * share common net prefix than routes sharing a common next hop). In src table,
- * there is a list of dst tables (list flowspec_links), this list is updated by
- * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during
- * channel start/stop). Each dst table has its own trie of prefixes that may
- * influence validation of flowspec routes in it (flowspec_trie).
+ * share common net prefix than routes sharing a common next hop). Every dst
+ * table has its own export request in every src table. Each dst table has its
+ * own trie of prefixes that may influence validation of flowspec routes in it
+ * (flowspec_trie).
*
- * When a best route changes in the src table, rt_flowspec_notify() immediately
- * checks all dst tables from the list using their tries to see whether the
- * change is relevant for them. If it is, then an asynchronous re-validation of
+ * When a best route changes in the src table, the notification mechanism is
+ * invoked by the export request which checks its dst table's trie to see
+ * whether the change is relevant, and if so, an asynchronous re-validation of
* flowspec routes in the dst table is scheduled. That is also done by function
* rt_next_hop_update(), like nexthop recomputation above. It iterates over all
* flowspec routes and re-validates them. It also recalculates the trie.
@@ -83,9 +82,8 @@
* will be re-validated later in this round anyway.
*
* The third mechanism is used for RPKI re-validation of IP routes and it is the
- * simplest. It is just a list of subscribers in src table, who are notified
- * when any change happened, but only after a settle time. Also, in RPKI case
- * the dst is not a table, but a channel, who refeeds routes through a filter.
+ * simplest. It is also an auxiliary export request belonging to the
+ * appropriate channel, triggering its reload/refeed timer after a settle time.
*/
#undef LOCAL_DEBUG
@@ -105,6 +103,7 @@
#include "lib/string.h"
#include "lib/alloca.h"
#include "lib/flowspec.h"
+#include "lib/idm.h"
#ifdef CONFIG_BGP
#include "proto/bgp/bgp.h"
@@ -129,27 +128,24 @@ struct rt_export_block {
struct rt_pending_export export[];
};
-static void rt_free_hostcache(rtable *tab);
-static void rt_notify_hostcache(rtable *tab, net *net);
-static void rt_update_hostcache(rtable *tab);
-static void rt_next_hop_update(rtable *tab);
+static void rt_free_hostcache(struct rtable_private *tab);
+static void rt_update_hostcache(void *tab);
+static void rt_next_hop_update(void *tab);
static inline void rt_next_hop_resolve_rte(rte *r);
static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c);
-static inline void rt_prune_table(rtable *tab);
-static inline void rt_schedule_notify(rtable *tab);
-static void rt_flowspec_notify(rtable *tab, net *net);
-static void rt_kick_prune_timer(rtable *tab);
+static inline void rt_prune_table(struct rtable_private *tab);
+static void rt_kick_prune_timer(struct rtable_private *tab);
static void rt_feed_by_fib(void *);
static void rt_feed_by_trie(void *);
static void rt_feed_equal(void *);
static void rt_feed_for(void *);
-static uint rt_feed_net(struct rt_export_hook *c, net *n);
-static void rt_check_cork_low(rtable *tab);
-static void rt_check_cork_high(rtable *tab);
+static void rt_check_cork_low(struct rtable_private *tab);
+static void rt_check_cork_high(struct rtable_private *tab);
static void rt_cork_release_hook(void *);
+static void rt_delete(void *);
-static inline void rt_export_used(struct rt_exporter *);
-static void rt_export_cleanup(rtable *tab);
+static void rt_export_used(struct rt_table_exporter *);
+static void rt_export_cleanup(struct rtable_private *tab);
static int rte_same(rte *x, rte *y);
@@ -185,13 +181,18 @@ const char *rt_export_state_name(u8 state)
return rt_export_state_name_array[state];
}
-static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old);
-static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep);
+static struct hostentry *rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep);
+
+#define rt_trace(tab, level, fmt, args...) do {\
+ struct rtable_private *t = (tab); \
+ if (t->config->debug & (level)) \
+ log(L_TRACE "%s: " fmt, t->name, ##args); \
+} while (0)
static void
net_init_with_trie(struct fib *f, void *N)
{
- rtable *tab = SKIP_BACK(rtable, fib, f);
+ struct rtable_private *tab = SKIP_BACK(struct rtable_private, fib, f);
net *n = N;
if (tab->trie)
@@ -202,7 +203,7 @@ net_init_with_trie(struct fib *f, void *N)
}
static inline net *
-net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0)
+net_route_ip4_trie(struct rtable_private *t, const net_addr_ip4 *n0)
{
TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n)
{
@@ -216,7 +217,7 @@ net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0)
}
static inline net *
-net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0)
+net_route_vpn4_trie(struct rtable_private *t, const net_addr_vpn4 *n0)
{
TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px)
{
@@ -232,7 +233,7 @@ net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0)
}
static inline net *
-net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0)
+net_route_ip6_trie(struct rtable_private *t, const net_addr_ip6 *n0)
{
TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n)
{
@@ -246,7 +247,7 @@ net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0)
}
static inline net *
-net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0)
+net_route_vpn6_trie(struct rtable_private *t, const net_addr_vpn6 *n0)
{
TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px)
{
@@ -262,7 +263,7 @@ net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0)
}
static inline void *
-net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0)
+net_route_ip6_sadr_trie(struct rtable_private *t, const net_addr_ip6_sadr *n0)
{
TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px)
{
@@ -295,7 +296,7 @@ net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0)
}
static inline net *
-net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0)
+net_route_ip4_fib(struct rtable_private *t, const net_addr_ip4 *n0)
{
net_addr_ip4 n;
net_copy_ip4(&n, n0);
@@ -311,7 +312,7 @@ net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0)
}
static inline net *
-net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0)
+net_route_vpn4_fib(struct rtable_private *t, const net_addr_vpn4 *n0)
{
net_addr_vpn4 n;
net_copy_vpn4(&n, n0);
@@ -327,7 +328,7 @@ net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0)
}
static inline net *
-net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0)
+net_route_ip6_fib(struct rtable_private *t, const net_addr_ip6 *n0)
{
net_addr_ip6 n;
net_copy_ip6(&n, n0);
@@ -343,7 +344,7 @@ net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0)
}
static inline net *
-net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0)
+net_route_vpn6_fib(struct rtable_private *t, const net_addr_vpn6 *n0)
{
net_addr_vpn6 n;
net_copy_vpn6(&n, n0);
@@ -359,7 +360,7 @@ net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0)
}
static inline void *
-net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0)
+net_route_ip6_sadr_fib(struct rtable_private *t, const net_addr_ip6_sadr *n0)
{
net_addr_ip6_sadr n;
net_copy_ip6_sadr(&n, n0);
@@ -399,7 +400,7 @@ net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0)
}
net *
-net_route(rtable *tab, const net_addr *n)
+net_route(struct rtable_private *tab, const net_addr *n)
{
ASSERT(tab->addr_type == n->type);
@@ -442,7 +443,7 @@ net_route(rtable *tab, const net_addr *n)
static int
-net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn)
+net_roa_check_ip4_trie(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn)
{
int anything = 0;
@@ -470,7 +471,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn)
}
static int
-net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn)
+net_roa_check_ip4_fib(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn)
{
struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
struct fib_node *fn;
@@ -502,7 +503,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn)
}
static int
-net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn)
+net_roa_check_ip6_trie(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn)
{
int anything = 0;
@@ -530,7 +531,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn)
}
static int
-net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn)
+net_roa_check_ip6_fib(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn)
{
struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
struct fib_node *fn;
@@ -577,24 +578,30 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn)
* must have type NET_IP4 or NET_IP6, respectively.
*/
int
-net_roa_check(rtable *tab, const net_addr *n, u32 asn)
+net_roa_check(rtable *tp, const net_addr *n, u32 asn)
{
- if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
- {
- if (tab->trie)
- return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn);
- else
- return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn);
- }
- else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
+ int out = ROA_UNKNOWN;
+
+ RT_LOCKED(tp, tab)
{
- if (tab->trie)
- return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn);
+ if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
+ {
+ if (tab->trie)
+ out = net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn);
+ else
+ out = net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn);
+ }
+ else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
+ {
+ if (tab->trie)
+ out = net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn);
+ else
+ out = net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn);
+ }
else
- return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn);
+ out = ROA_UNKNOWN; /* Should not happen */
}
- else
- return ROA_UNKNOWN; /* Should not happen */
+ return out;
}
/**
@@ -618,7 +625,7 @@ rte_find(net *net, struct rte_src *src)
struct rte_storage *
-rte_store(const rte *r, net *net, rtable *tab)
+rte_store(const rte *r, net *net, struct rtable_private *tab)
{
struct rte_storage *e = sl_alloc(tab->rte_slab);
@@ -902,7 +909,7 @@ channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *r
}
void
-rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe,
+rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first,
struct rte **feed, uint count)
{
struct channel *c = SKIP_BACK(struct channel, out_req, req);
@@ -946,7 +953,7 @@ rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_p
done:
/* Check obsolete routes for previously exported */
- while (rpe)
+ RPE_WALK(first, rpe, NULL)
{
channel_rpe_mark_seen(req, rpe);
if (rpe->old)
@@ -957,7 +964,6 @@ done:
old_best = &rpe->old->rte;
}
}
- rpe = rpe_next(rpe, NULL);
}
/* Nothing to export */
@@ -1030,7 +1036,7 @@ rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool
}
void
-rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe,
+rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first,
struct rte **feed, uint count)
{
struct channel *c = SKIP_BACK(struct channel, out_req, req);
@@ -1056,7 +1062,7 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen
}
/* Check obsolete routes for previously exported */
- while (rpe)
+ RPE_WALK(first, rpe, NULL)
{
channel_rpe_mark_seen(req, rpe);
if (rpe->old)
@@ -1067,7 +1073,6 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen
old_best = &rpe->old->rte;
}
}
- rpe = rpe_next(rpe, NULL);
}
/* Prepare new merged route */
@@ -1078,17 +1083,16 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen
}
void
-rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe)
+rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first)
{
struct channel *c = SKIP_BACK(struct channel, out_req, req);
- rte *o = RTE_VALID_OR_NULL(rpe->old_best);
- struct rte_storage *new_best = rpe->new_best;
+ rte *o = RTE_VALID_OR_NULL(first->old_best);
+ struct rte_storage *new_best = first->new_best;
- while (rpe)
+ RPE_WALK(first, rpe, NULL)
{
channel_rpe_mark_seen(req, rpe);
new_best = rpe->new_best;
- rpe = rpe_next(rpe, NULL);
}
rte n0 = RTE_COPY_VALID(new_best);
@@ -1097,27 +1101,26 @@ rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_
}
void
-rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe)
+rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first)
{
struct channel *c = SKIP_BACK(struct channel, out_req, req);
- rte *n = RTE_VALID_OR_NULL(rpe->new);
- rte *o = RTE_VALID_OR_NULL(rpe->old);
+ rte *n = RTE_VALID_OR_NULL(first->new);
+ rte *o = RTE_VALID_OR_NULL(first->old);
if (!n && !o)
{
- channel_rpe_mark_seen(req, rpe);
+ channel_rpe_mark_seen(req, first);
return;
}
struct rte_src *src = n ? n->src : o->src;
- struct rte_storage *new_latest = rpe->new;
+ struct rte_storage *new_latest = first->new;
- while (rpe)
+ RPE_WALK(first, rpe, src)
{
channel_rpe_mark_seen(req, rpe);
new_latest = rpe->new;
- rpe = rpe_next(rpe, src);
}
rte n0 = RTE_COPY_VALID(new_latest);
@@ -1165,9 +1168,11 @@ rpe_next(struct rt_pending_export *rpe, struct rte_src *src)
}
static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last);
-static void
-rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe)
+static int
+rte_export(struct rt_table_export_hook *th, struct rt_pending_export *rpe)
{
+ rtable *tab = RT_PUB(SKIP_BACK(struct rtable_private, exporter, th->table));
+ struct rt_export_hook *hook = &th->h;
if (bmap_test(&hook->seq_map, rpe->seq))
goto ignore; /* Seen already */
@@ -1205,6 +1210,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe)
else if (hook->req->export_bulk)
{
net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n);
+ RT_LOCK(tab);
uint count = rte_feed_count(net);
rte **feed = NULL;
if (count)
@@ -1212,6 +1218,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe)
feed = alloca(count * sizeof(rte *));
rte_feed_obtain(net, feed, count);
}
+ RT_UNLOCK(tab);
hook->req->export_bulk(hook->req, n, rpe, feed, count);
}
else
@@ -1219,15 +1226,16 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe)
ignore:
/* Get the next export if exists */
- hook->rpe_next = rt_next_export_fast(rpe);
+ th->rpe_next = rt_next_export_fast(rpe);
/* The last block may be available to free */
- if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe))
- CALL(hook->table->used, hook->table);
+ int used = (PAGE_HEAD(th->rpe_next) != PAGE_HEAD(rpe));
/* Releasing this export for cleanup routine */
DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq);
- atomic_store_explicit(&hook->last_export, rpe, memory_order_release);
+ atomic_store_explicit(&th->last_export, rpe, memory_order_release);
+
+ return used;
}
/**
@@ -1262,7 +1270,7 @@ ignore:
* done outside of scope of rte_announce().
*/
static void
-rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old,
+rte_announce(struct rtable_private *tab, net *net, struct rte_storage *new, struct rte_storage *old,
struct rte_storage *new_best, struct rte_storage *old_best)
{
int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best));
@@ -1271,23 +1279,12 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage
if ((new == old) && (new_best == old_best))
return;
- if (new_best_valid || old_best_valid)
- {
- if (new_best_valid)
- new_best->rte.sender->stats.pref++;
- if (old_best_valid)
- old_best->rte.sender->stats.pref--;
-
- if (tab->hostcache)
- rt_notify_hostcache(tab, net);
+ if (new_best_valid)
+ new_best->rte.sender->stats.pref++;
+ if (old_best_valid)
+ old_best->rte.sender->stats.pref--;
- if (!EMPTY_LIST(tab->flowspec_links))
- rt_flowspec_notify(tab, net);
- }
-
- rt_schedule_notify(tab);
-
- if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending))
+ if (EMPTY_LIST(tab->exporter.e.hooks) && EMPTY_LIST(tab->exporter.pending))
{
/* No export hook and no pending exports to cleanup. We may free the route immediately. */
if (!old)
@@ -1356,7 +1353,7 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage
&net->last->next, &rpenull, rpe,
memory_order_relaxed,
memory_order_relaxed));
-
+
}
net->last = rpe;
@@ -1368,9 +1365,6 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage
tab->exporter.first = rpe;
rt_check_cork_high(tab);
-
- if (!tm_active(tab->exporter.export_timer))
- tm_start(tab->exporter.export_timer, tab->config->export_settle_time);
}
static struct rt_pending_export *
@@ -1399,8 +1393,10 @@ rt_next_export_fast(struct rt_pending_export *last)
}
static struct rt_pending_export *
-rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab)
+rt_next_export(struct rt_table_export_hook *hook, struct rt_table_exporter *tab)
{
+ ASSERT_DIE(RT_IS_LOCKED(SKIP_BACK(struct rtable_private, exporter, tab)));
+
/* As the table is locked, it is safe to reload the last export pointer */
struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire);
@@ -1416,26 +1412,57 @@ rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab)
static inline void
rt_send_export_event(struct rt_export_hook *hook)
{
- ev_send(hook->req->list, hook->event);
+ ev_send(hook->req->list, &hook->event);
}
static void
rt_announce_exports(timer *tm)
{
- rtable *tab = tm->data;
+ RT_LOCKED((rtable *) tm->data, tab)
+ if (!EMPTY_LIST(tab->exporter.pending))
+ {
+ struct rt_export_hook *c; node *n;
+ WALK_LIST2(c, n, tab->exporter.e.hooks, n)
+ {
+ if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY)
+ continue;
- struct rt_export_hook *c; node *n;
- WALK_LIST2(c, n, tab->exporter.hooks, n)
+ rt_send_export_event(c);
+ }
+ }
+}
+
+static void
+rt_import_announce_exports(void *_hook)
+{
+ struct rt_import_hook *hook = _hook;
+ RT_LOCKED(hook->table, tab)
{
- if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY)
- continue;
+ if (hook->import_state == TIS_CLEARED)
+ {
+ void (*stopped)(struct rt_import_request *) = hook->stopped;
+ struct rt_import_request *req = hook->req;
+ req->hook = NULL;
+
+ rt_trace(tab, D_EVENTS, "Hook %s stopped", req->name);
+ rem_node(&hook->n);
+ mb_free(hook);
+ rt_unlock_table(tab);
+ RT_UNLOCK(tab);
- rt_send_export_event(c);
+ stopped(req);
+ return;
+ }
+
+ rt_trace(tab, D_EVENTS, "Announcing exports after imports from %s", hook->req->name);
+
+ if (!tm_active(tab->exporter.export_timer))
+ tm_start(tab->exporter.export_timer, tab->config->export_settle_time);
}
}
static struct rt_pending_export *
-rt_last_export(struct rt_exporter *tab)
+rt_last_export(struct rt_table_exporter *tab)
{
struct rt_pending_export *rpe = NULL;
@@ -1455,31 +1482,42 @@ rt_last_export(struct rt_exporter *tab)
static void
rt_export_hook(void *_data)
{
- struct rt_export_hook *c = _data;
+ struct rt_table_export_hook *c = _data;
+ rtable *tab = SKIP_BACK(rtable, priv.exporter, c->table);
- ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY);
+ ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_READY);
if (!c->rpe_next)
{
+ RT_LOCK(tab);
c->rpe_next = rt_next_export(c, c->table);
if (!c->rpe_next)
{
- CALL(c->table->used, c->table);
+ rt_export_used(c->table);
+ RT_UNLOCK(tab);
return;
}
+
+ RT_UNLOCK(tab);
}
+ int used = 0;
+
/* Process the export */
for (uint i=0; i<RT_EXPORT_BULK; i++)
{
- rte_export(c, c->rpe_next);
+ used += rte_export(c, c->rpe_next);
if (!c->rpe_next)
break;
}
- rt_send_export_event(c);
+ if (used)
+ RT_LOCKED(tab, _)
+ rt_export_used(c->table);
+
+ rt_send_export_event(&c->h);
}
@@ -1549,11 +1587,10 @@ rte_same(rte *x, rte *y)
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
-static void
-rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src)
+static int
+rte_recalculate(struct rtable_private *table, struct rt_import_hook *c, net *net, rte *new, struct rte_src *src)
{
struct rt_import_request *req = c->req;
- struct rtable *table = c->table;
struct rt_import_stats *stats = &c->stats;
struct rte_storage *old_best_stored = net->routes, *old_stored = NULL;
rte *old_best = old_best_stored ? &old_best_stored->rte : NULL;
@@ -1602,7 +1639,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr
/* We need to free the already stored route here before returning */
rte_free(new_stored);
- return;
+ return 0;
}
*before_old = (*before_old)->next;
@@ -1612,7 +1649,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr
if (!old && !new)
{
stats->withdraws_ignored++;
- return;
+ return 0;
}
/* If rejected by import limit, we need to pretend there is no route */
@@ -1748,18 +1785,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr
rte_announce(table, net, new_stored, old_stored,
net->routes, old_best_stored);
- if (!net->routes &&
- (table->gc_counter++ >= table->config->gc_threshold))
- rt_kick_prune_timer(table);
-
-#if 0
- /* Enable and reimplement these callbacks if anybody wants to use them */
- if (old_ok && p->rte_remove)
- p->rte_remove(net, old);
- if (new_ok && p->rte_insert)
- p->rte_insert(net, &new_stored->rte);
-#endif
-
+ return 1;
}
int
@@ -1870,39 +1896,47 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt
if (!hook)
return;
- net *nn;
- if (new)
+ RT_LOCKED(hook->table, tab)
+ {
+ net *nn;
+ if (new)
{
/* Use the actual struct network, not the dummy one */
- nn = net_get(hook->table, n);
+ nn = net_get(tab, n);
new->net = nn->n.addr;
new->sender = hook;
/* Set the stale cycle */
new->stale_cycle = hook->stale_set;
}
- else if (!(nn = net_find(hook->table, n)))
+ else if (!(nn = net_find(tab, n)))
{
req->hook->stats.withdraws_ignored++;
- return;
+ RT_RETURN(tab);
}
- /* And recalculate the best route */
- rte_recalculate(hook, nn, new, src);
+ /* Recalculate the best route */
+ if (rte_recalculate(tab, hook, nn, new, src))
+ ev_send(req->list, &hook->announce_event);
+ }
}
/* Check rtable for best route to given net whether it would be exported do p */
int
-rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter)
+rt_examine(rtable *tp, net_addr *a, struct channel *c, const struct filter *filter)
{
- net *n = net_find(t, a);
+ rte rt = {};
- if (!n || !rte_is_valid(RTE_OR_NULL(n->routes)))
- return 0;
+ RT_LOCKED(tp, t)
+ {
+ net *n = net_find(t, a);
+ if (n)
+ rt = RTE_COPY_VALID(n->routes);
+ }
- rte rt = n->routes->rte;
+ if (!rt.src)
+ return 0;
- /* Rest is stripped down export_filter() */
int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0;
if (v == RIC_PROCESS)
v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT);
@@ -1911,34 +1945,41 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte
}
static void
-rt_table_export_done(struct rt_export_hook *hook)
+rt_table_export_done(void *hh)
{
- struct rt_exporter *re = hook->table;
- struct rtable *tab = SKIP_BACK(struct rtable, exporter, re);
+ struct rt_table_export_hook *hook = hh;
+ struct rt_export_request *req = hook->h.req;
+ void (*stopped)(struct rt_export_request *) = hook->h.stopped;
+ rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table);
- rt_unlock_table(tab);
- DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count);
+ RT_LOCKED(t, tab)
+ {
+ DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count);
+
+ /* Drop pending exports */
+ rt_export_used(&tab->exporter);
+
+ /* Do the common code; this frees the hook */
+ rt_export_stopped(&hook->h);
+ }
+
+ /* Report the channel as stopped. */
+ CALL(stopped, req);
+
+ /* Unlock the table; this may free it */
+ rt_unlock_table(t);
}
-static void
-rt_export_stopped(void *data)
+void
+rt_export_stopped(struct rt_export_hook *hook)
{
- struct rt_export_hook *hook = data;
- struct rt_exporter *tab = hook->table;
-
- /* Drop pending exports */
- CALL(tab->used, tab);
+ /* Unlink from the request */
+ hook->req->hook = NULL;
/* Unlist */
rem_node(&hook->n);
- /* Report the channel as stopped. */
- hook->stopped(hook->req);
-
- /* Reporting the hook as finished. */
- CALL(tab->done, hook);
-
- /* Free the hook. */
+ /* Free the hook itself together with its pool */
rfree(hook->pool);
}
@@ -1948,8 +1989,7 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state)
hook->last_state_change = current_time();
hook->import_state = state;
- if (hook->req->log_state_change)
- hook->req->log_state_change(hook->req, state);
+ CALL(hook->req->log_state_change, hook->req, state);
}
void
@@ -1958,26 +1998,28 @@ rt_set_export_state(struct rt_export_hook *hook, u8 state)
hook->last_state_change = current_time();
atomic_store_explicit(&hook->export_state, state, memory_order_release);
- if (hook->req->log_state_change)
- hook->req->log_state_change(hook->req, state);
+ CALL(hook->req->log_state_change, hook->req, state);
}
void
-rt_request_import(rtable *tab, struct rt_import_request *req)
+rt_request_import(rtable *t, struct rt_import_request *req)
{
- rt_lock_table(tab);
+ RT_LOCKED(t, tab)
+ {
+ rt_lock_table(tab);
- struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook));
+ struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook));
- DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count);
+ hook->announce_event = (event) { .hook = rt_import_announce_exports, .data = hook };
- hook->req = req;
- hook->table = tab;
+ DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count);
- rt_set_import_state(hook, TIS_UP);
+ hook->req = req;
+ hook->table = t;
- hook->n = (node) {};
- add_tail(&tab->imports, &hook->n);
+ rt_set_import_state(hook, TIS_UP);
+ add_tail(&tab->imports, &hook->n);
+ }
}
void
@@ -1986,22 +2028,24 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r
ASSERT_DIE(req->hook);
struct rt_import_hook *hook = req->hook;
- rt_schedule_prune(hook->table);
-
- rt_set_import_state(hook, TIS_STOP);
-
- hook->stopped = stopped;
+ RT_LOCKED(hook->table, tab)
+ {
+ rt_schedule_prune(tab);
+ rt_set_import_state(hook, TIS_STOP);
+ hook->stopped = stopped;
+ }
}
-static struct rt_export_hook *
-rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req)
+static void
+rt_table_export_start_locked(struct rtable_private *tab, struct rt_export_request *req)
{
- rtable *tab = SKIP_BACK(rtable, exporter, re);
+ struct rt_exporter *re = &tab->exporter.e;
rt_lock_table(tab);
- pool *p = rp_new(tab->rp, "Export hook");
- struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
- hook->pool = p;
+ req->hook = rt_alloc_export(re, sizeof(struct rt_table_export_hook));
+ req->hook->req = req;
+
+ struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, req->hook);
/* stats zeroed by mb_allocz */
switch (req->addr_mode)
@@ -2009,24 +2053,25 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req)
case TE_ADDR_IN:
if (tab->trie && net_val_match(tab->addr_type, NB_IP))
{
- hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state));
+ hook->walk_state = mb_allocz(hook->h.pool, sizeof (struct f_trie_walk_state));
hook->walk_lock = rt_lock_trie(tab);
trie_walk_init(hook->walk_state, tab->trie, req->addr);
- hook->event = ev_new_init(p, rt_feed_by_trie, hook);
+ hook->h.event.hook = rt_feed_by_trie;
+ hook->walk_last.type = 0;
break;
}
/* fall through */
case TE_ADDR_NONE:
FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib);
- hook->event = ev_new_init(p, rt_feed_by_fib, hook);
+ hook->h.event.hook = rt_feed_by_fib;
break;
case TE_ADDR_EQUAL:
- hook->event = ev_new_init(p, rt_feed_equal, hook);
+ hook->h.event.hook = rt_feed_equal;
break;
case TE_ADDR_FOR:
- hook->event = ev_new_init(p, rt_feed_for, hook);
+ hook->h.event.hook = rt_feed_for;
break;
default:
@@ -2035,22 +2080,50 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req)
DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count);
- return hook;
+ struct rt_pending_export *rpe = rt_last_export(hook->table);
+ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0);
+ atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed);
+
+ rt_init_export(re, req->hook);
+}
+
+static void
+rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req)
+{
+ RT_LOCKED(SKIP_BACK(rtable, priv.exporter.e, re), tab)
+ rt_table_export_start_locked(tab, req);
+}
+
+void rt_request_export(rtable *t, struct rt_export_request *req)
+{
+ RT_LOCKED(t, tab)
+ rt_table_export_start_locked(tab, req); /* Is locked inside */
}
void
-rt_request_export(struct rt_exporter *re, struct rt_export_request *req)
+rt_request_export_other(struct rt_exporter *re, struct rt_export_request *req)
+{
+ return re->class->start(re, req);
+}
+
+struct rt_export_hook *
+rt_alloc_export(struct rt_exporter *re, uint size)
{
- struct rt_export_hook *hook = req->hook = re->start(re, req);
+ pool *p = rp_new(re->rp, "Export hook");
+ struct rt_export_hook *hook = mb_allocz(p, size);
- hook->req = req;
+ hook->pool = p;
hook->table = re;
- bmap_init(&hook->seq_map, hook->pool, 1024);
+ return hook;
+}
- struct rt_pending_export *rpe = rt_last_export(hook->table);
- DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0);
- atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed);
+void
+rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook)
+{
+ hook->event.data = hook;
+
+ bmap_init(&hook->seq_map, hook->pool, 1024);
hook->n = (node) {};
add_tail(&re->hooks, &hook->n);
@@ -2061,45 +2134,57 @@ rt_request_export(struct rt_exporter *re, struct rt_export_request *req)
}
static void
-rt_table_export_stop(struct rt_export_hook *hook)
+rt_table_export_stop_locked(struct rt_export_hook *hh)
{
- rtable *tab = SKIP_BACK(rtable, exporter, hook->table);
+ struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh);
+ struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, hook->table);
- if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING)
- return;
-
- switch (hook->req->addr_mode)
- {
- case TE_ADDR_IN:
- if (hook->walk_lock)
- {
- rt_unlock_trie(tab, hook->walk_lock);
- hook->walk_lock = NULL;
- mb_free(hook->walk_state);
- hook->walk_state = NULL;
+ if (atomic_load_explicit(&hh->export_state, memory_order_relaxed) == TES_FEEDING)
+ switch (hh->req->addr_mode)
+ {
+ case TE_ADDR_IN:
+ if (hook->walk_lock)
+ {
+ rt_unlock_trie(tab, hook->walk_lock);
+ hook->walk_lock = NULL;
+ mb_free(hook->walk_state);
+ hook->walk_state = NULL;
+ break;
+ }
+ /* fall through */
+ case TE_ADDR_NONE:
+ fit_get(&tab->fib, &hook->feed_fit);
break;
- }
- /* fall through */
- case TE_ADDR_NONE:
- fit_get(&tab->fib, &hook->feed_fit);
- break;
- }
+ }
+}
+
+static void
+rt_table_export_stop(struct rt_export_hook *hh)
+{
+ struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh);
+ rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table);
+ if (RT_IS_LOCKED(t))
+ rt_table_export_stop_locked(hh);
+ else
+ RT_LOCKED(t, tab)
+ rt_table_export_stop_locked(hh);
}
void
rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *))
{
+ ASSERT_DIE(birdloop_inside(req->list->loop));
ASSERT_DIE(req->hook);
struct rt_export_hook *hook = req->hook;
/* Cancel the feeder event */
- ev_postpone(hook->event);
+ ev_postpone(&hook->event);
/* Stop feeding from the exporter */
- CALL(hook->table->stop, hook);
+ CALL(hook->table->class->stop, hook);
/* Reset the event as the stopped event */
- hook->event->hook = rt_export_stopped;
+ hook->event.hook = hook->table->class->done;
hook->stopped = stopped;
/* Update export state */
@@ -2130,12 +2215,15 @@ rt_refresh_begin(struct rt_import_request *req)
ASSERT_DIE(hook);
ASSERT_DIE(hook->stale_set == hook->stale_valid);
+ RT_LOCKED(hook->table, tab)
+ {
+
/* If the pruning routine is too slow */
if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid)
|| (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128))
{
log(L_WARN "Route refresh flood in table %s", hook->table->name);
- FIB_WALK(&hook->table->fib, net, n)
+ FIB_WALK(&tab->fib, net, n)
{
for (struct rte_storage *e = n->routes; e; e = e->next)
if (e->rte.sender == req->hook)
@@ -2156,6 +2244,8 @@ rt_refresh_begin(struct rt_import_request *req)
if (req->trace_routes & D_STATES)
log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set);
+
+ }
}
/**
@@ -2172,13 +2262,16 @@ rt_refresh_end(struct rt_import_request *req)
struct rt_import_hook *hook = req->hook;
ASSERT_DIE(hook);
- hook->stale_valid++;
- ASSERT_DIE(hook->stale_set == hook->stale_valid);
+ RT_LOCKED(hook->table, tab)
+ {
+ hook->stale_valid++;
+ ASSERT_DIE(hook->stale_set == hook->stale_valid);
- rt_schedule_prune(hook->table);
+ rt_schedule_prune(tab);
- if (req->trace_routes & D_STATES)
- log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid);
+ if (req->trace_routes & D_STATES)
+ log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid);
+ }
}
/**
@@ -2203,8 +2296,11 @@ rte_dump(struct rte_storage *e)
* This function dumps contents of a given routing table to debug output.
*/
void
-rt_dump(rtable *t)
+rt_dump(rtable *tp)
{
+ RT_LOCKED(tp, t)
+ {
+
debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : "");
#ifdef DEBUGGING
fib_check(&t->fib);
@@ -2216,6 +2312,8 @@ rt_dump(rtable *t)
}
FIB_WALK_END;
debug("\n");
+
+ }
}
/**
@@ -2237,11 +2335,14 @@ rt_dump_all(void)
}
void
-rt_dump_hooks(rtable *tab)
+rt_dump_hooks(rtable *tp)
{
+ RT_LOCKED(tp, tab)
+ {
+
debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : "");
- debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n",
- tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count);
+ debug(" nhu_state=%u use_count=%d rt_count=%u\n",
+ tab->nhu_state, tab->use_count, tab->rt_count);
debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n",
tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state);
@@ -2255,15 +2356,18 @@ rt_dump_hooks(rtable *tab)
ih->last_state_change, ih->import_state, ih->stopped);
}
- struct rt_export_hook *eh;
- WALK_LIST(eh, tab->exporter.hooks)
+ struct rt_table_export_hook *eh;
+ WALK_LIST(eh, tab->exporter.e.hooks)
{
- eh->req->dump_req(eh->req);
+ eh->h.req->dump_req(eh->h.req);
debug(" Export hook %p requested by %p:"
" refeed_pending=%u last_state_change=%t export_state=%u\n",
- eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed));
+ eh, eh->h.req, eh->refeed_pending, eh->h.last_state_change,
+ atomic_load_explicit(&eh->h.export_state, memory_order_relaxed));
}
debug("\n");
+
+ }
}
void
@@ -2282,30 +2386,32 @@ rt_dump_hooks_all(void)
}
static inline void
-rt_schedule_hcu(rtable *tab)
+rt_schedule_nhu(struct rtable_private *tab)
{
- if (tab->hcu_scheduled)
- return;
-
- tab->hcu_scheduled = 1;
- ev_schedule(tab->rt_event);
-}
-
-static inline void
-rt_schedule_nhu(rtable *tab)
-{
- if (tab->nhu_state == NHU_CLEAN)
- ev_schedule(tab->rt_event);
-
- /* state change:
- * NHU_CLEAN -> NHU_SCHEDULED
- * NHU_RUNNING -> NHU_DIRTY
- */
- tab->nhu_state |= NHU_SCHEDULED;
+ if (tab->nhu_corked)
+ {
+ if (!(tab->nhu_corked & NHU_SCHEDULED))
+ {
+ tab->nhu_corked |= NHU_SCHEDULED;
+ rt_lock_table(tab);
+ }
+ }
+ else if (!(tab->nhu_state & NHU_SCHEDULED))
+ {
+ rt_trace(tab, D_EVENTS, "Scheduling NHU");
+ rt_lock_table(tab);
+
+ /* state change:
+ * NHU_CLEAN -> NHU_SCHEDULED
+ * NHU_RUNNING -> NHU_DIRTY
+ */
+ if ((tab->nhu_state |= NHU_SCHEDULED) == NHU_SCHEDULED)
+ ev_schedule(tab->nhu_event);
+ }
}
void
-rt_schedule_prune(rtable *tab)
+rt_schedule_prune(struct rtable_private *tab)
{
if (tab->prune_state == 0)
ev_schedule(tab->rt_event);
@@ -2315,12 +2421,12 @@ rt_schedule_prune(rtable *tab)
}
static void
-rt_export_used(struct rt_exporter *e)
+rt_export_used(struct rt_table_exporter *e)
{
- rtable *tab = SKIP_BACK(rtable, exporter, e);
+ struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, e);
+ ASSERT_DIE(RT_IS_LOCKED(tab));
- if (config->table_debug)
- log(L_TRACE "%s: Export cleanup requested", tab->name);
+ rt_trace(tab, D_EVENTS, "Export cleanup requested");
if (tab->export_used)
return;
@@ -2332,69 +2438,31 @@ rt_export_used(struct rt_exporter *e)
static void
rt_event(void *ptr)
{
- rtable *tab = ptr;
+ RT_LOCKED((rtable *) ptr, tab)
+ {
rt_lock_table(tab);
if (tab->export_used)
rt_export_cleanup(tab);
- if (
- tab->hcu_corked ||
- tab->nhu_corked ||
- (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event)
- )
- {
- if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug)
- log(L_TRACE "%s: Auxiliary routines corked", tab->name);
-
- tab->hcu_corked |= tab->hcu_scheduled;
- tab->hcu_scheduled = 0;
-
- tab->nhu_corked |= tab->nhu_state;
- tab->nhu_state = 0;
- }
-
- if (tab->hcu_scheduled)
- rt_update_hostcache(tab);
-
- if (tab->nhu_state)
- rt_next_hop_update(tab);
-
if (tab->prune_state)
rt_prune_table(tab);
rt_unlock_table(tab);
-}
-
-static void
-rt_uncork_event(void *ptr)
-{
- rtable *tab = ptr;
-
- tab->hcu_scheduled |= tab->hcu_corked;
- tab->hcu_corked = 0;
-
- tab->nhu_state |= tab->nhu_corked;
- tab->nhu_corked = 0;
-
- if (config->table_debug)
- log(L_TRACE "%s: Auxiliary routines uncorked", tab->name);
-
- ev_schedule(tab->rt_event);
+ }
}
static void
rt_prune_timer(timer *t)
{
- rtable *tab = t->data;
-
- if (tab->gc_counter >= tab->config->gc_threshold)
- rt_schedule_prune(tab);
+ RT_LOCKED((rtable *) t->data, tab)
+ if (tab->gc_counter >= tab->config->gc_threshold)
+ rt_schedule_prune(tab);
}
static void
-rt_kick_prune_timer(rtable *tab)
+rt_kick_prune_timer(struct rtable_private *tab)
{
/* Return if prune is already scheduled */
if (tm_active(tab->prune_timer) || (tab->prune_state & 1))
@@ -2407,153 +2475,132 @@ rt_kick_prune_timer(rtable *tab)
}
-static inline btime
-rt_settled_time(rtable *tab)
-{
- ASSUME(tab->base_settle_time != 0);
-
- return MIN(tab->last_rt_change + tab->config->min_settle_time,
- tab->base_settle_time + tab->config->max_settle_time);
-}
-
static void
-rt_settle_timer(timer *t)
+rt_flowspec_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first)
{
- rtable *tab = t->data;
-
- if (!tab->base_settle_time)
- return;
+ struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req);
+ rtable *dst_pub = ln->dst;
+ ASSUME(rt_is_flow(dst_pub));
+ struct rtable_private *dst = RT_LOCK(dst_pub);
- btime settled_time = rt_settled_time(tab);
- if (current_time() < settled_time)
+ /* No need to inspect it further if recalculation is already scheduled */
+ if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY)
+ || !trie_match_net(dst->flowspec_trie, net))
{
- tm_set(tab->settle_timer, settled_time);
+ RT_UNLOCK(dst_pub);
+ rpe_mark_seen_all(req->hook, first, NULL);
return;
}
- /* Settled */
- tab->base_settle_time = 0;
-
- struct rt_subscription *s;
- WALK_LIST(s, tab->subscribers)
- ev_send(s->list, s->event);
-}
-
-static void
-rt_kick_settle_timer(rtable *tab)
-{
- tab->base_settle_time = current_time();
-
- if (!tab->settle_timer)
- tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0);
-
- if (!tm_active(tab->settle_timer))
- tm_set(tab->settle_timer, rt_settled_time(tab));
-}
-
-static inline void
-rt_schedule_notify(rtable *tab)
-{
- if (EMPTY_LIST(tab->subscribers))
- return;
+ /* This net may affect some flowspecs, check the actual change */
+ rte *o = RTE_VALID_OR_NULL(first->old_best);
+ struct rte_storage *new_best = first->new_best;
- if (tab->base_settle_time)
- return;
+ RPE_WALK(first, rpe, NULL)
+ {
+ rpe_mark_seen(req->hook, rpe);
+ new_best = rpe->new_best;
+ }
- rt_kick_settle_timer(tab);
-}
+ /* Yes, something has actually changed. Schedule the update. */
+ if (o != RTE_VALID_OR_NULL(new_best))
+ rt_schedule_nhu(dst);
-void
-rt_subscribe(rtable *tab, struct rt_subscription *s)
-{
- s->tab = tab;
- rt_lock_table(tab);
- DBG("rt_subscribe(%s)\n", tab->name);
- add_tail(&tab->subscribers, &s->n);
+ RT_UNLOCK(dst_pub);
}
-void
-rt_unsubscribe(struct rt_subscription *s)
+static void
+rt_flowspec_dump_req(struct rt_export_request *req)
{
- rem_node(&s->n);
- rt_unlock_table(s->tab);
+ struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req);
+ debug(" Flowspec link for table %s (%p)\n", ln->dst->name, req);
}
static struct rt_flowspec_link *
-rt_flowspec_find_link(rtable *src, rtable *dst)
+rt_flowspec_find_link(struct rtable_private *src, rtable *dst)
{
- struct rt_flowspec_link *ln;
- WALK_LIST(ln, src->flowspec_links)
- if ((ln->src == src) && (ln->dst == dst))
- return ln;
+ struct rt_table_export_hook *hook; node *n;
+ WALK_LIST2(hook, n, src->exporter.e.hooks, h.n)
+ switch (atomic_load_explicit(&hook->h.export_state, memory_order_acquire))
+ {
+ case TES_FEEDING:
+ case TES_READY:
+ if (hook->h.req->export_one == rt_flowspec_export_one)
+ {
+ struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, hook->h.req);
+ if (ln->dst == dst)
+ return ln;
+ }
+ }
return NULL;
}
void
-rt_flowspec_link(rtable *src, rtable *dst)
+rt_flowspec_link(rtable *src_pub, rtable *dst_pub)
{
- ASSERT(rt_is_ip(src));
- ASSERT(rt_is_flow(dst));
+ ASSERT(rt_is_ip(src_pub));
+ ASSERT(rt_is_flow(dst_pub));
- struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst);
+ int lock_dst = 0;
- if (!ln)
+ RT_LOCKED(src_pub, src)
{
- rt_lock_table(src);
- rt_lock_table(dst);
+ struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst_pub);
- ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link));
- ln->src = src;
- ln->dst = dst;
- add_tail(&src->flowspec_links, &ln->n);
+ if (!ln)
+ {
+ pool *p = src->rp;
+ ln = mb_allocz(p, sizeof(struct rt_flowspec_link));
+ ln->src = src_pub;
+ ln->dst = dst_pub;
+ ln->req = (struct rt_export_request) {
+ .name = mb_sprintf(p, "%s.flowspec.notifier", dst_pub->name),
+ .list = &global_work_list,
+ .trace_routes = src->config->debug,
+ .dump_req = rt_flowspec_dump_req,
+ .export_one = rt_flowspec_export_one,
+ };
+
+ rt_table_export_start_locked(src, &ln->req);
+
+ lock_dst = 1;
+ }
+
+ ln->uc++;
}
- ln->uc++;
+ if (lock_dst)
+ rt_lock_table(dst_pub);
}
-void
-rt_flowspec_unlink(rtable *src, rtable *dst)
+static void
+rt_flowspec_link_stopped(struct rt_export_request *req)
{
- struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst);
-
- ASSERT(ln && (ln->uc > 0));
-
- ln->uc--;
+ struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req);
+ rtable *dst = ln->dst;
- if (!ln->uc)
- {
- rem_node(&ln->n);
- mb_free(ln);
-
- rt_unlock_table(src);
- rt_unlock_table(dst);
- }
+ mb_free(ln);
+ rt_unlock_table(dst);
}
-static void
-rt_flowspec_notify(rtable *src, net *net)
+void
+rt_flowspec_unlink(rtable *src, rtable *dst)
{
- /* Only IP tables are src links */
- ASSERT(rt_is_ip(src));
-
struct rt_flowspec_link *ln;
- WALK_LIST(ln, src->flowspec_links)
+ RT_LOCKED(src, t)
{
- rtable *dst = ln->dst;
- ASSERT(rt_is_flow(dst));
+ ln = rt_flowspec_find_link(t, dst);
- /* No need to inspect it further if recalculation is already active */
- if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY))
- continue;
+ ASSERT(ln && (ln->uc > 0));
- if (trie_match_net(dst->flowspec_trie, net->n.addr))
- rt_schedule_nhu(dst);
+ if (!--ln->uc)
+ rt_stop_export(&ln->req, rt_flowspec_link_stopped);
}
}
static void
-rt_flowspec_reset_trie(rtable *tab)
+rt_flowspec_reset_trie(struct rtable_private *tab)
{
linpool *lp = tab->flowspec_trie->lp;
int ipv4 = tab->flowspec_trie->ipv4;
@@ -2566,7 +2613,9 @@ rt_flowspec_reset_trie(rtable *tab)
static void
rt_free(resource *_r)
{
- rtable *r = (rtable *) _r;
+ struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r);
+
+ DOMAIN_FREE(rtable, r->lock);
DBG("Deleting routing table %s\n", r->name);
ASSERT_DIE(r->use_count == 0);
@@ -2581,7 +2630,6 @@ rt_free(resource *_r)
fib_free(&r->fib);
hmap_free(&r->id_map);
rfree(r->rt_event);
- rfree(r->settle_timer);
mb_free(r);
*/
}
@@ -2589,26 +2637,42 @@ rt_free(resource *_r)
static void
rt_res_dump(resource *_r)
{
- rtable *r = (rtable *) _r;
+ struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r);
+
debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n",
r->name, net_label[r->addr_type], r->rt_count, r->use_count);
}
static struct resclass rt_class = {
.name = "Routing table",
- .size = sizeof(struct rtable),
+ .size = sizeof(rtable),
.free = rt_free,
.dump = rt_res_dump,
.lookup = NULL,
.memsize = NULL,
};
+static const struct rt_exporter_class rt_table_exporter_class = {
+ .start = rt_table_export_start,
+ .stop = rt_table_export_stop,
+ .done = rt_table_export_done,
+};
+
+void
+rt_exporter_init(struct rt_exporter *e)
+{
+ init_list(&e->hooks);
+}
+
+static struct idm rtable_idm;
+uint rtable_max_id = 0;
+
rtable *
rt_setup(pool *pp, struct rtable_config *cf)
{
pool *p = rp_newf(pp, "Routing table %s", cf->name);
- rtable *t = ralloc(p, &rt_class);
+ struct rtable_private *t = ralloc(p, &rt_class);
t->rp = p;
t->rte_slab = sl_new(p, sizeof(struct rte_storage));
@@ -2616,6 +2680,11 @@ rt_setup(pool *pp, struct rtable_config *cf)
t->name = cf->name;
t->config = cf;
t->addr_type = cf->addr_type;
+ t->id = idm_alloc(&rtable_idm);
+ if (t->id >= rtable_max_id)
+ rtable_max_id = t->id + 1;
+
+ t->lock = DOMAIN_NEW(rtable, t->name);
fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
@@ -2627,44 +2696,41 @@ rt_setup(pool *pp, struct rtable_config *cf)
t->fib.init = net_init_with_trie;
}
- init_list(&t->flowspec_links);
-
- t->exporter = (struct rt_exporter) {
- .addr_type = t->addr_type,
- .start = rt_table_export_start,
- .stop = rt_table_export_stop,
- .done = rt_table_export_done,
- .used = rt_export_used,
- };
-
- init_list(&t->exporter.hooks);
- init_list(&t->exporter.pending);
-
init_list(&t->imports);
hmap_init(&t->id_map, p, 1024);
hmap_set(&t->id_map, 0);
- init_list(&t->subscribers);
-
t->rt_event = ev_new_init(p, rt_event, t);
- t->uncork_event = ev_new_init(p, rt_uncork_event, t);
+ t->nhu_event = ev_new_init(p, rt_next_hop_update, t);
t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0);
- t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0);
t->last_rt_change = t->gc_time = current_time();
- t->exporter.next_seq = 1;
+
+ t->exporter = (struct rt_table_exporter) {
+ .e = {
+ .class = &rt_table_exporter_class,
+ .addr_type = t->addr_type,
+ .rp = t->rp,
+ },
+ .export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0),
+ .next_seq = 1,
+ };
+
+ rt_exporter_init(&t->exporter.e);
+
+ init_list(&t->exporter.pending);
t->cork_threshold = cf->cork_threshold;
t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS;
- if (rt_is_flow(t))
+ if (rt_is_flow(RT_PUB(t)))
{
t->flowspec_trie = f_new_trie(lp_new_default(p), 0);
t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4);
}
- return t;
+ return RT_PUB(t);
}
/**
@@ -2682,6 +2748,7 @@ rt_init(void)
init_list(&deleted_routing_tables);
ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release");
rt_cork.run = (event) { .hook = rt_cork_release_hook };
+ idm_init(&rtable_idm, rt_table_pool, 256);
}
@@ -2700,7 +2767,7 @@ rt_init(void)
* iteration.
*/
static void
-rt_prune_table(rtable *tab)
+rt_prune_table(struct rtable_private *tab)
{
struct fib_iterator *fit = &tab->prune_fit;
int limit = 2000;
@@ -2708,7 +2775,7 @@ rt_prune_table(rtable *tab)
struct rt_import_hook *ih;
node *n, *x;
- DBG("Pruning route table %s\n", tab->name);
+ rt_trace(tab, D_STATES, "Pruning");
#ifdef DEBUGGING
fib_check(&tab->fib);
#endif
@@ -2762,7 +2829,7 @@ again:
(e->rte.stale_cycle < s->stale_valid) ||
(e->rte.stale_cycle > s->stale_set))
{
- rte_recalculate(e->rte.sender, n, NULL, e->rte.src);
+ rte_recalculate(tab, e->rte.sender, n, NULL, e->rte.src);
limit--;
goto rescan;
@@ -2784,6 +2851,10 @@ again:
}
FIB_ITERATE_END;
+ rt_trace(tab, D_EVENTS, "Prune done, scheduling export timer");
+ if (!tm_active(tab->exporter.export_timer))
+ tm_start(tab->exporter.export_timer, tab->config->export_settle_time);
+
#ifdef DEBUGGING
fib_check(&tab->fib);
#endif
@@ -2840,24 +2911,25 @@ again:
}
/* In some cases, we may want to directly proceed to export cleanup */
- if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels)
+ if (EMPTY_LIST(tab->exporter.e.hooks) && flushed_channels)
rt_export_cleanup(tab);
}
static void
-rt_export_cleanup(rtable *tab)
+rt_export_cleanup(struct rtable_private *tab)
{
tab->export_used = 0;
u64 min_seq = ~((u64) 0);
struct rt_pending_export *last_export_to_free = NULL;
struct rt_pending_export *first = tab->exporter.first;
+ int want_prune = 0;
- struct rt_export_hook *eh;
+ struct rt_table_export_hook *eh;
node *n;
- WALK_LIST2(eh, n, tab->exporter.hooks, n)
+ WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n)
{
- switch (atomic_load_explicit(&eh->export_state, memory_order_acquire))
+ switch (atomic_load_explicit(&eh->h.export_state, memory_order_acquire))
{
case TES_DOWN:
continue;
@@ -2885,16 +2957,14 @@ rt_export_cleanup(rtable *tab)
tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL;
- if (config->table_debug)
- log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld",
- tab->name,
+ rt_trace(tab, D_STATES, "Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld",
first ? first->seq : 0,
tab->exporter.first ? tab->exporter.first->seq : 0,
min_seq);
- WALK_LIST2(eh, n, tab->exporter.hooks, n)
+ WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n)
{
- if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY)
+ if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY)
continue;
struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire);
@@ -2920,7 +2990,7 @@ rt_export_cleanup(rtable *tab)
net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n);
ASSERT_DIE(net->first == first);
-
+
if (first == net->last)
/* The only export here */
net->last = net->first = NULL;
@@ -2928,6 +2998,8 @@ rt_export_cleanup(rtable *tab)
/* First is now the next one */
net->first = atomic_load_explicit(&first->next, memory_order_relaxed);
+ want_prune += !net->routes && !net->first;
+
/* For now, the old route may be finally freed */
if (first->old)
{
@@ -2948,7 +3020,7 @@ rt_export_cleanup(rtable *tab)
ASSERT_DIE(pos < end);
struct rt_pending_export *next = NULL;
-
+
if (++pos < end)
next = &reb->export[pos];
else
@@ -2963,17 +3035,16 @@ rt_export_cleanup(rtable *tab)
if (EMPTY_LIST(tab->exporter.pending))
{
- if (config->table_debug)
- log(L_TRACE "%s: Resetting export seq", tab->name);
+ rt_trace(tab, D_EVENTS, "Resetting export seq");
node *n;
- WALK_LIST2(eh, n, tab->exporter.hooks, n)
+ WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n)
{
- if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY)
+ if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY)
continue;
ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL);
- bmap_reset(&eh->seq_map, 1024);
+ bmap_reset(&eh->h.seq_map, 1024);
}
tab->exporter.next_seq = 1;
@@ -2997,12 +3068,12 @@ done:;
if (!first || (first->seq >= ih->flush_seq))
{
ih->import_state = TIS_CLEARED;
- ih->stopped(ih->req);
- rem_node(&ih->n);
- mb_free(ih);
- rt_unlock_table(tab);
+ ev_send(ih->req->list, &ih->announce_event);
}
+ if ((tab->gc_counter += want_prune) >= tab->config->gc_threshold)
+ rt_kick_prune_timer(tab);
+
if (tab->export_used)
ev_schedule(tab->rt_event);
@@ -3035,7 +3106,7 @@ rt_cork_release_hook(void *data UNUSED)
*
*/
struct f_trie *
-rt_lock_trie(rtable *tab)
+rt_lock_trie(struct rtable_private *tab)
{
ASSERT(tab->trie);
@@ -3052,7 +3123,7 @@ rt_lock_trie(rtable *tab)
* It may free the trie and schedule next trie pruning.
*/
void
-rt_unlock_trie(rtable *tab, struct f_trie *trie)
+rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie)
{
ASSERT(trie);
@@ -3092,8 +3163,8 @@ rt_preconfig(struct config *c)
{
init_list(&c->tables);
- rt_new_table(cf_get_symbol("master4"), NET_IP4);
- rt_new_table(cf_get_symbol("master6"), NET_IP6);
+ c->def_tables[NET_IP4] = cf_define_symbol(cf_get_symbol("master4"), SYM_TABLE, table, NULL);
+ c->def_tables[NET_IP6] = cf_define_symbol(cf_get_symbol("master6"), SYM_TABLE, table, NULL);
}
void
@@ -3108,6 +3179,13 @@ rt_postconfig(struct config *c)
WALK_LIST(rc, c->tables)
if (rc->gc_period == (uint) -1)
rc->gc_period = (uint) def_gc_period;
+
+ for (uint net_type = 0; net_type < NET_MAX; net_type++)
+ if (c->def_tables[net_type] && !c->def_tables[net_type]->table)
+ {
+ c->def_tables[net_type]->class = SYM_VOID;
+ c->def_tables[net_type] = NULL;
+ }
}
@@ -3117,7 +3195,7 @@ rt_postconfig(struct config *c)
*/
void
-ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum])
+ea_set_hostentry(ea_list **to, rtable *dep, rtable *src, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum])
{
struct {
struct adata ad;
@@ -3125,7 +3203,8 @@ ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr g
u32 labels[lnum];
} *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata));
- head->he = rt_get_hostentry(tab, gw, ll, dep);
+ RT_LOCKED(src, tab)
+ head->he = rt_get_hostentry(tab, gw, ll, dep);
memcpy(head->labels, labels, lnum * sizeof(u32));
ea_set_attr(to, EA_LITERAL_DIRECT_ADATA(
@@ -3254,17 +3333,16 @@ rta_next_hop_outdated(ea_list *a)
? head : NULL;
}
-static inline struct rte_storage *
-rt_next_hop_update_rte(rtable *tab, net *n, rte *old)
+static inline int
+rt_next_hop_update_rte(rte *old, rte *new)
{
struct hostentry_adata *head = rta_next_hop_outdated(old->attrs);
if (!head)
- return NULL;
-
- rte e0 = *old;
- rta_apply_hostentry(&e0.attrs, head);
+ return 0;
- return rte_store(&e0, n, tab);
+ *new = *old;
+ rta_apply_hostentry(&new->attrs, head);
+ return 1;
}
static inline void
@@ -3322,7 +3400,6 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list *
{
ASSERT(rt_is_ip(tab_ip));
ASSERT(rt_is_flow(tab_flow));
- ASSERT(tab_ip->trie);
/* RFC 8955 6. a) Flowspec has defined dst prefix */
if (!net_flow_has_dst_prefix(n))
@@ -3342,32 +3419,45 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list *
else
net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n));
- /* Find best-match BGP unicast route for flowspec dst prefix */
- net *nb = net_route(tab_ip, &dst);
- const rte *rb = nb ? &nb->routes->rte : NULL;
+ rte rb = {};
+ net_addr_union nau;
+ RT_LOCKED(tab_ip, tip)
+ {
+ ASSERT(tip->trie);
+ /* Find best-match BGP unicast route for flowspec dst prefix */
+ net *nb = net_route(tip, &dst);
+ if (nb)
+ {
+ rb = RTE_COPY_VALID(nb->routes);
+ rta_clone(rb.attrs);
+ net_copy(&nau.n, nb->n.addr);
+ rb.net = &nau.n;
+ }
+ }
/* Register prefix to trie for tracking further changes */
int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
- trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen);
+ RT_LOCKED(tab_flow, tfl)
+ trie_add_prefix(tfl->flowspec_trie, &dst, (rb.net ? rb.net->pxlen : 0), max_pxlen);
/* No best-match BGP route -> no flowspec */
- if (!rb || (rt_get_source_attr(rb) != RTS_BGP))
+ if (!rb.attrs || (rt_get_source_attr(&rb) != RTS_BGP))
return FLOWSPEC_INVALID;
/* Find ORIGINATOR_ID values */
u32 orig_a = ea_get_int(a, "bgp_originator_id", 0);
- u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0);
+ u32 orig_b = ea_get_int(rb.attrs, "bgp_originator_id", 0);
/* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */
if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal(
ea_get_ip(a, &ea_gen_from, IPA_NONE),
- ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE)
+ ea_get_ip(rb.attrs, &ea_gen_from, IPA_NONE)
)))
return FLOWSPEC_INVALID;
/* Find ASN of the best-match route, for use in next checks */
- u32 asn_b = rta_get_first_asn(rb->attrs);
+ u32 asn_b = rta_get_first_asn(rb.attrs);
if (!asn_b)
return FLOWSPEC_INVALID;
@@ -3376,51 +3466,53 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list *
return FLOWSPEC_INVALID;
/* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */
- TRIE_WALK(tab_ip->trie, subnet, &dst)
+ RT_LOCKED(tab_ip, tip)
{
- net *nc = net_find_valid(tab_ip, &subnet);
- if (!nc)
- continue;
+ TRIE_WALK(tip->trie, subnet, &dst)
+ {
+ net *nc = net_find_valid(tip, &subnet);
+ if (!nc)
+ continue;
- const rte *rc = &nc->routes->rte;
- if (rt_get_source_attr(rc) != RTS_BGP)
- return FLOWSPEC_INVALID;
+ const rte *rc = &nc->routes->rte;
+ if (rt_get_source_attr(rc) != RTS_BGP)
+ RT_RETURN(tip, FLOWSPEC_INVALID);
- if (rta_get_first_asn(rc->attrs) != asn_b)
- return FLOWSPEC_INVALID;
+ if (rta_get_first_asn(rc->attrs) != asn_b)
+ RT_RETURN(tip, FLOWSPEC_INVALID);
+ }
+ TRIE_WALK_END;
}
- TRIE_WALK_END;
return FLOWSPEC_VALID;
}
#endif /* CONFIG_BGP */
-static struct rte_storage *
-rt_flowspec_update_rte(rtable *tab, net *n, rte *r)
+static int
+rt_flowspec_update_rte(rtable *tab, rte *r, rte *new)
{
#ifdef CONFIG_BGP
if (r->generation || (rt_get_source_attr(r) != RTS_BGP))
- return NULL;
+ return 0;
struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req);
if (!bc->base_table)
- return NULL;
+ return 0;
struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto);
enum flowspec_valid old = rt_get_flowspec_valid(r),
- valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior);
+ valid = rt_flowspec_check(bc->base_table, tab, r->net, r->attrs, p->is_interior);
if (old == valid)
- return NULL;
-
- rte new = *r;
- ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid);
+ return 0;
- return rte_store(&new, n, tab);
+ *new = *r;
+ ea_set_attr_u32(&new->attrs, &ea_gen_flowspec_valid, 0, valid);
+ return 1;
#else
- return NULL;
+ return 0;
#endif
}
@@ -3455,10 +3547,9 @@ rt_flowspec_resolve_rte(rte *r, struct channel *c)
}
static inline int
-rt_next_hop_update_net(rtable *tab, net *n)
+rt_next_hop_update_net(struct rtable_private *tab, net *n)
{
- struct rte_storage *new;
- int count = 0;
+ uint count = 0;
int is_flow = net_is_flow(n->n.addr);
struct rte_storage *old_best = n->routes;
@@ -3466,49 +3557,90 @@ rt_next_hop_update_net(rtable *tab, net *n)
return 0;
for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next)
- if (is_flow || rta_next_hop_outdated(e->rte.attrs))
- count++;
+ count++;
if (!count)
return 0;
struct rte_multiupdate {
- struct rte_storage *old, *new;
- } *updates = alloca(sizeof(struct rte_multiupdate) * count);
+ struct rte_storage *old, *new_stored;
+ rte new;
+ } *updates = tmp_allocz(sizeof(struct rte_multiupdate) * (count+1));
- int pos = 0;
+ struct rt_pending_export *last_pending = n->last;
+
+ uint pos = 0;
for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next)
- if (is_flow || rta_next_hop_outdated(e->rte.attrs))
- {
- struct rte_storage *new = is_flow
- ? rt_flowspec_update_rte(tab, n, &e->rte)
- : rt_next_hop_update_rte(tab, n, &e->rte);
+ updates[pos++].old = e;
+
+ /* This is an exceptional place where table can be unlocked while keeping its data:
+ * the reason why this is safe is that NHU must be always run from the same
+ * thread as cleanup routines, therefore the only real problem may arise when
+ * some importer does a change on this particular net (destination) while NHU
+ * is being computed. Statistically, this should almost never happen. In such
+ * case, we just drop all the computed changes and do it once again.
+ * */
+ RT_UNLOCK(tab);
+
+ uint mod = 0;
+ if (is_flow)
+ for (uint i = 0; i < pos; i++)
+ mod += rt_flowspec_update_rte(RT_PUB(tab), &updates[i].old->rte, &updates[i].new);
- if (!new)
- continue;
+ else
+ for (uint i = 0; i < pos; i++)
+ mod += rt_next_hop_update_rte(&updates[i].old->rte, &updates[i].new);
- /* Call a pre-comparison hook */
- /* Not really an efficient way to compute this */
- if (e->rte.src->owner->rte_recalculate)
- e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte);
+ RT_LOCK(RT_PUB(tab));
- updates[pos++] = (struct rte_multiupdate) {
- .old = e,
- .new = new,
- };
+ if (!mod)
+ return 0;
+
+ /* Something has changed inbetween, retry NHU. */
+ if (last_pending != n->last)
+ return rt_next_hop_update_net(tab, n);
+
+ /* Now we reconstruct the original linked list */
+ struct rte_storage **nptr = &n->routes;
+ for (uint i = 0; i < pos; i++)
+ {
+ updates[i].old->next = NULL;
- /* Replace the route in the list */
- new->next = e->next;
- *k = e = new;
+ struct rte_storage *put;
+ if (updates[i].new.attrs)
+ put = updates[i].new_stored = rte_store(&updates[i].new, n, tab);
+ else
+ put = updates[i].old;
+
+ *nptr = put;
+ nptr = &put->next;
+ }
+ *nptr = NULL;
+ /* Call the pre-comparison hooks */
+ for (uint i = 0; i < pos; i++)
+ if (updates[i].new_stored)
+ {
/* Get a new ID for the route */
- new->rte.lastmod = current_time();
- new->rte.id = hmap_first_zero(&tab->id_map);
- hmap_set(&tab->id_map, new->rte.id);
+ updates[i].new_stored->rte.lastmod = current_time();
+ updates[i].new_stored->rte.id = hmap_first_zero(&tab->id_map);
+ hmap_set(&tab->id_map, updates[i].new_stored->rte.id);
+
+ /* Call a pre-comparison hook */
+ /* Not really an efficient way to compute this */
+ if (updates[i].old->rte.src->owner->rte_recalculate)
+ updates[i].old->rte.src->owner->rte_recalculate(tab, n, &updates[i].new_stored->rte, &updates[i].old->rte, &old_best->rte);
}
- ASSERT_DIE(pos <= count);
- count = pos;
+#if DEBUGGING
+ {
+ uint t = 0;
+ for (struct rte_storage *e = n->routes; e; e = e->next)
+ t++;
+ ASSERT_DIE(t == pos);
+ ASSERT_DIE(pos == count);
+ }
+#endif
/* Find the new best route */
struct rte_storage **new_best = NULL;
@@ -3519,7 +3651,7 @@ rt_next_hop_update_net(rtable *tab, net *n)
}
/* Relink the new best route to the first position */
- new = *new_best;
+ struct rte_storage *new = *new_best;
if (new != n->routes)
{
*new_best = new->next;
@@ -3527,88 +3659,155 @@ rt_next_hop_update_net(rtable *tab, net *n)
n->routes = new;
}
+ uint total = 0;
/* Announce the changes */
- for (int i=0; i<count; i++)
+ for (uint i=0; i<count; i++)
{
- _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old);
+ if (!updates[i].new_stored)
+ continue;
+
+ _Bool nb = (new->rte.src == updates[i].new.src), ob = (i == 0);
const char *best_indicator[2][2] = {
{ "autoupdated", "autoupdated [-best]" },
{ "autoupdated [+best]", "autoupdated [best]" }
};
- rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]);
- rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best);
+ rt_rte_trace_in(D_ROUTES, updates[i].new.sender->req, &updates[i].new, best_indicator[nb][ob]);
+ rte_announce(tab, n, updates[i].new_stored, updates[i].old, new, old_best);
+
+ total++;
}
- return count;
+ return total;
}
static void
-rt_next_hop_update(rtable *tab)
+rt_next_hop_update(void *_tab)
{
+ RT_LOCKED((rtable *) _tab, tab)
+ {
+
+ /* If called from an uncork hook, reset the state */
+ if (tab->nhu_corked)
+ {
+ ASSERT_DIE(tab->nhu_state == 0);
+ tab->nhu_state = tab->nhu_corked;
+ tab->nhu_corked = 0;
+ rt_trace(tab, D_STATES, "Next hop updater uncorked");
+ }
+
+ if (!tab->nhu_state)
+ bug("Called NHU event for no reason in table %s", tab->name);
+
+ /* Check corkedness */
+ if (rt_cork_check(tab->nhu_event))
+ {
+ rt_trace(tab, D_STATES, "Next hop updater corked");
+ if ((tab->nhu_state & NHU_RUNNING)
+ && !EMPTY_LIST(tab->exporter.pending)
+ && !tm_active(tab->exporter.export_timer))
+ tm_start(tab->exporter.export_timer, tab->config->export_settle_time);
+
+ tab->nhu_corked = tab->nhu_state;
+ tab->nhu_state = 0;
+ RT_RETURN(tab);
+ }
+
struct fib_iterator *fit = &tab->nhu_fit;
int max_feed = 32;
- if (tab->nhu_state == NHU_CLEAN)
- return;
-
+ /* Initialize a new run */
if (tab->nhu_state == NHU_SCHEDULED)
- {
- FIB_ITERATE_INIT(fit, &tab->fib);
- tab->nhu_state = NHU_RUNNING;
+ {
+ FIB_ITERATE_INIT(fit, &tab->fib);
+ tab->nhu_state = NHU_RUNNING;
- if (tab->flowspec_trie)
- rt_flowspec_reset_trie(tab);
- }
+ if (tab->flowspec_trie)
+ rt_flowspec_reset_trie(tab);
+ }
+ /* Walk the fib one net after another */
FIB_ITERATE_START(&tab->fib, fit, net, n)
{
if (max_feed <= 0)
{
FIB_ITERATE_PUT(fit);
- ev_schedule(tab->rt_event);
- return;
+ ev_schedule(tab->nhu_event);
+ RT_RETURN(tab);
}
+ lp_state lps;
+ lp_save(tmp_linpool, &lps);
max_feed -= rt_next_hop_update_net(tab, n);
+ lp_restore(tmp_linpool, &lps);
}
FIB_ITERATE_END;
+ /* Finished NHU, cleanup */
+ rt_trace(tab, D_EVENTS, "NHU done, scheduling export timer");
+
+ if (!tm_active(tab->exporter.export_timer))
+ tm_start(tab->exporter.export_timer, tab->config->export_settle_time);
+
/* State change:
* NHU_DIRTY -> NHU_SCHEDULED
* NHU_RUNNING -> NHU_CLEAN
*/
- tab->nhu_state &= 1;
+ if ((tab->nhu_state &= NHU_SCHEDULED) == NHU_SCHEDULED)
+ ev_schedule(tab->nhu_event);
- if (tab->nhu_state != NHU_CLEAN)
- ev_schedule(tab->rt_event);
+ rt_unlock_table(tab);
+
+ }
}
+void
+rt_new_default_table(struct symbol *s)
+{
+ for (uint addr_type = 0; addr_type < NET_MAX; addr_type++)
+ if (s == new_config->def_tables[addr_type])
+ {
+ s->table = rt_new_table(s, addr_type);
+ return;
+ }
+
+ bug("Requested an unknown new default table: %s", s->name);
+}
struct rtable_config *
-rt_new_table(struct symbol *s, uint addr_type)
+rt_get_default_table(struct config *cf, uint addr_type)
{
- /* Hack that allows to 'redefine' the master table */
- if ((s->class == SYM_TABLE) &&
- (s->table == new_config->def_tables[addr_type]) &&
- ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
- return s->table;
+ struct symbol *ts = cf->def_tables[addr_type];
+ if (!ts)
+ return NULL;
+ if (!ts->table)
+ rt_new_default_table(ts);
+
+ return ts->table;
+}
+
+struct rtable_config *
+rt_new_table(struct symbol *s, uint addr_type)
+{
struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
- cf_define_symbol(s, SYM_TABLE, table, c);
+ if (s == new_config->def_tables[addr_type])
+ s->table = c;
+ else
+ cf_define_symbol(s, SYM_TABLE, table, c);
+
c->name = s->name;
c->addr_type = addr_type;
c->gc_threshold = 1000;
c->gc_period = (uint) -1; /* set in rt_postconfig() */
- c->min_settle_time = 1 S;
- c->max_settle_time = 20 S;
c->cork_threshold.low = 128;
c->cork_threshold.high = 512;
+ c->debug = new_config->table_debug;
add_tail(&new_config->tables, &c->n);
/* First table of each type is kept as default */
if (! new_config->def_tables[addr_type])
- new_config->def_tables[addr_type] = c;
+ new_config->def_tables[addr_type] = s;
return c;
}
@@ -3622,8 +3821,9 @@ rt_new_table(struct symbol *s, uint addr_type)
* configuration.
*/
void
-rt_lock_table(rtable *r)
+rt_lock_table_priv(struct rtable_private *r, const char *file, uint line)
{
+ rt_trace(r, D_STATES, "Locked at %s:%d", file, line);
r->use_count++;
}
@@ -3636,20 +3836,32 @@ rt_lock_table(rtable *r)
* for deletion by configuration changes.
*/
void
-rt_unlock_table(rtable *r)
+rt_unlock_table_priv(struct rtable_private *r, const char *file, uint line)
{
+ rt_trace(r, D_STATES, "Unlocked at %s:%d", file, line);
if (!--r->use_count && r->deleted)
- {
- struct config *conf = r->deleted;
+ /* Schedule the delete event to finish this up */
+ ev_send(&global_event_list, ev_new_init(r->rp, rt_delete, r));
+}
- /* Delete the routing table by freeing its pool */
- rt_shutdown(r);
- config_del_obstacle(conf);
- }
+static void
+rt_delete(void *tab_)
+{
+ /* We assume that nobody holds the table reference now as use_count is zero.
+ * Anyway the last holder may still hold the lock. Therefore we lock and
+ * unlock it the last time to be sure that nobody is there. */
+ struct rtable_private *tab = RT_LOCK((rtable *) tab_);
+ struct config *conf = tab->deleted;
+
+ RT_UNLOCK(RT_PUB(tab));
+
+ rfree(tab->rp);
+ config_del_obstacle(conf);
}
+
static void
-rt_check_cork_low(rtable *tab)
+rt_check_cork_low(struct rtable_private *tab)
{
if (!tab->cork_active)
return;
@@ -3659,27 +3871,25 @@ rt_check_cork_low(rtable *tab)
tab->cork_active = 0;
rt_cork_release();
- if (config->table_debug)
- log(L_TRACE "%s: Uncorked", tab->name);
+ rt_trace(tab, D_STATES, "Uncorked");
}
}
static void
-rt_check_cork_high(rtable *tab)
+rt_check_cork_high(struct rtable_private *tab)
{
if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq))
{
tab->cork_active = 1;
rt_cork_acquire();
- if (config->table_debug)
- log(L_TRACE "%s: Corked", tab->name);
+ rt_trace(tab, D_STATES, "Corked");
}
}
static int
-rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old)
+rt_reconfigure(struct rtable_private *tab, struct rtable_config *new, struct rtable_config *old)
{
if ((new->addr_type != old->addr_type) ||
(new->sorted != old->sorted) ||
@@ -3687,10 +3897,18 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old
return 0;
DBG("\t%s: same\n", new->name);
- new->table = tab;
+ new->table = RT_PUB(tab);
tab->name = new->name;
tab->config = new;
+ if (tab->hostcache)
+ tab->hostcache->req.trace_routes = new->debug;
+
+ struct rt_table_export_hook *hook; node *n;
+ WALK_LIST2(hook, n, tab->exporter.e.hooks, h.n)
+ if (hook->h.req->export_one == rt_flowspec_export_one)
+ hook->h.req->trace_routes = new->debug;
+
tab->cork_threshold = new->cork_threshold;
if (new->cork_threshold.high != old->cork_threshold.high)
@@ -3731,19 +3949,32 @@ rt_commit(struct config *new, struct config *old)
{
WALK_LIST(o, old->tables)
{
- rtable *tab = o->table;
+ struct rtable_private *tab = RT_LOCK(o->table);
+
if (tab->deleted)
+ {
+ RT_UNLOCK(tab);
continue;
+ }
r = rt_find_table_config(new, o->name);
if (r && !new->shutdown && rt_reconfigure(tab, r, o))
+ {
+ RT_UNLOCK(tab);
continue;
+ }
DBG("\t%s: deleted\n", o->name);
tab->deleted = old;
config_add_obstacle(old);
rt_lock_table(tab);
+
+ if (tab->hostcache)
+ rt_stop_export(&tab->hostcache->req, NULL);
+
rt_unlock_table(tab);
+
+ RT_UNLOCK(tab);
}
}
@@ -3760,13 +3991,81 @@ rt_commit(struct config *new, struct config *old)
static void
rt_feed_done(struct rt_export_hook *c)
{
- c->event->hook = rt_export_hook;
+ c->event.hook = rt_export_hook;
rt_set_export_state(c, TES_READY);
rt_send_export_event(c);
}
+#define MAX_FEED_BLOCK 1024
+typedef struct {
+ uint cnt, pos;
+ union {
+ struct rt_pending_export *rpe;
+ struct {
+ rte **feed;
+ uint *start;
+ };
+ };
+} rt_feed_block;
+
+static int
+rt_prepare_feed(struct rt_table_export_hook *c, net *n, rt_feed_block *b)
+{
+ if (n->routes)
+ {
+ if (c->h.req->export_bulk)
+ {
+ uint cnt = rte_feed_count(n);
+ if (b->cnt && (b->cnt + cnt > MAX_FEED_BLOCK))
+ return 0;
+
+ if (!b->cnt)
+ {
+ b->feed = tmp_alloc(sizeof(rte *) * MAX(MAX_FEED_BLOCK, cnt));
+ b->start = tmp_alloc(sizeof(uint) * ((cnt >= MAX_FEED_BLOCK) ? 2 : (MAX_FEED_BLOCK + 2 - cnt)));
+ }
+
+ rte_feed_obtain(n, &b->feed[b->cnt], cnt);
+ b->start[b->pos++] = b->cnt;
+ b->cnt += cnt;
+ }
+ else if (b->pos == MAX_FEED_BLOCK)
+ return 0;
+ else
+ {
+ if (!b->pos)
+ b->rpe = tmp_alloc(sizeof(struct rt_pending_export) * MAX_FEED_BLOCK);
+
+ b->rpe[b->pos++] = (struct rt_pending_export) { .new = n->routes, .new_best = n->routes };
+ }
+ }
+
+ rpe_mark_seen_all(&c->h, n->first, NULL);
+ return 1;
+}
+
+static void
+rt_process_feed(struct rt_table_export_hook *c, rt_feed_block *b)
+{
+ if (!b->pos)
+ return;
+
+ if (c->h.req->export_bulk)
+ {
+ b->start[b->pos] = b->cnt;
+ for (uint p = 0; p < b->pos; p++)
+ {
+ rte **feed = &b->feed[b->start[p]];
+ c->h.req->export_bulk(c->h.req, feed[0]->net, NULL, feed, b->start[p+1] - b->start[p]);
+ }
+ }
+ else
+ for (uint p = 0; p < b->pos; p++)
+ c->h.req->export_one(c->h.req, b->rpe[p].new->rte.net, &b->rpe[p]);
+}
+
/**
* rt_feed_by_fib - advertise all routes to a channel by walking a fib
* @c: channel to be fed
@@ -3779,61 +4078,73 @@ rt_feed_done(struct rt_export_hook *c)
static void
rt_feed_by_fib(void *data)
{
- struct rt_export_hook *c = data;
-
+ struct rt_table_export_hook *c = data;
struct fib_iterator *fit = &c->feed_fit;
- int max_feed = 256;
+ rt_feed_block block = {};
- ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
+ ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING);
- rtable *tab = SKIP_BACK(rtable, exporter, c->table);
+ RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab)
+ {
FIB_ITERATE_START(&tab->fib, fit, net, n)
{
- if (max_feed <= 0)
+ if ((c->h.req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->h.req->addr))
+ {
+ if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING)
+ return;
+
+ if (!rt_prepare_feed(c, n, &block))
{
FIB_ITERATE_PUT(fit);
- rt_send_export_event(c);
+ RT_UNLOCK(tab);
+ rt_process_feed(c, &block);
+ rt_send_export_event(&c->h);
return;
}
-
- if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING)
- return;
-
- if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr))
- max_feed -= rt_feed_net(c, n);
+ }
}
FIB_ITERATE_END;
+ }
- rt_feed_done(c);
+ rt_process_feed(c, &block);
+ rt_feed_done(&c->h);
}
static void
rt_feed_by_trie(void *data)
{
- struct rt_export_hook *c = data;
- rtable *tab = SKIP_BACK(rtable, exporter, c->table);
+ struct rt_table_export_hook *c = data;
+ rt_feed_block block = {};
+
+ RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab)
+ {
ASSERT_DIE(c->walk_state);
struct f_trie_walk_state *ws = c->walk_state;
- int max_feed = 256;
+ ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING);
- ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
+ do {
+ if (!c->walk_last.type)
+ continue;
- net_addr addr;
- while (trie_walk_next(ws, &addr))
- {
- net *n = net_find(tab, &addr);
+ net *n = net_find(tab, &c->walk_last);
if (!n)
continue;
- if ((max_feed -= rt_feed_net(c, n)) <= 0)
- return;
+ if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING)
+ RT_RETURN(tab);
- if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING)
+ if (!rt_prepare_feed(c, n, &block))
+ {
+ RT_UNLOCK(tab);
+ rt_process_feed(c, &block);
+ rt_send_export_event(&c->h);
return;
+ }
}
+ while (trie_walk_next(ws, &c->walk_last));
rt_unlock_trie(tab, c->walk_lock);
c->walk_lock = NULL;
@@ -3841,70 +4152,59 @@ rt_feed_by_trie(void *data)
mb_free(c->walk_state);
c->walk_state = NULL;
- rt_feed_done(c);
+ c->walk_last.type = 0;
+
+ }
+
+ rt_process_feed(c, &block);
+ rt_feed_done(&c->h);
}
static void
rt_feed_equal(void *data)
{
- struct rt_export_hook *c = data;
- rtable *tab = SKIP_BACK(rtable, exporter, c->table);
+ struct rt_table_export_hook *c = data;
+ rt_feed_block block = {};
+ net *n;
+
+ RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab)
+ {
+ ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING);
+ ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_EQUAL);
- ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
- ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL);
+ if (n = net_find(tab, c->h.req->addr))
+ ASSERT_DIE(rt_prepare_feed(c, n, &block));
+ }
- net *n = net_find(tab, c->req->addr);
if (n)
- rt_feed_net(c, n);
+ rt_process_feed(c, &block);
- rt_feed_done(c);
+ rt_feed_done(&c->h);
}
static void
rt_feed_for(void *data)
{
- struct rt_export_hook *c = data;
- rtable *tab = SKIP_BACK(rtable, exporter, c->table);
-
- ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
- ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR);
-
- net *n = net_route(tab, c->req->addr);
- if (n)
- rt_feed_net(c, n);
-
- rt_feed_done(c);
-}
-
-static uint
-rt_feed_net(struct rt_export_hook *c, net *n)
-{
- uint count = 0;
+ struct rt_table_export_hook *c = data;
+ rt_feed_block block = {};
+ net *n;
- if (c->req->export_bulk)
+ RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab)
{
- count = rte_feed_count(n);
- if (count)
- {
- rte **feed = alloca(count * sizeof(rte *));
- rte_feed_obtain(n, feed, count);
- c->req->export_bulk(c->req, n->n.addr, NULL, feed, count);
- }
- }
+ ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING);
+ ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_FOR);
- else if (n->routes)
- {
- struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes };
- c->req->export_one(c->req, n->n.addr, &rpe);
- count = 1;
+ if (n = net_route(tab, c->h.req->addr))
+ ASSERT_DIE(rt_prepare_feed(c, n, &block));
}
- for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL))
- rpe_mark_seen(c, rpe);
+ if (n)
+ rt_process_feed(c, &block);
- return count;
+ rt_feed_done(&c->h);
}
+
/*
* Import table
*/
@@ -4030,7 +4330,41 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he)
}
static void
-rt_init_hostcache(rtable *tab)
+hc_notify_dump_req(struct rt_export_request *req)
+{
+ debug(" Table %s (%p)\n", req->name, req);
+}
+
+static void
+hc_notify_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first)
+{
+ struct hostcache *hc = SKIP_BACK(struct hostcache, req, req);
+
+ /* No interest in this update, mark seen only */
+ if (ev_active(&hc->update) || !trie_match_net(hc->trie, net))
+ {
+ rpe_mark_seen_all(req->hook, first, NULL);
+ return;
+ }
+
+ /* This net may affect some hostentries, check the actual change */
+ rte *o = RTE_VALID_OR_NULL(first->old_best);
+ struct rte_storage *new_best = first->new_best;
+
+ RPE_WALK(first, rpe, NULL)
+ {
+ rpe_mark_seen(req->hook, rpe);
+ new_best = rpe->new_best;
+ }
+
+ /* Yes, something has actually changed. Do the hostcache update. */
+ if (o != RTE_VALID_OR_NULL(new_best))
+ ev_schedule_work(&hc->update);
+}
+
+
+static void
+rt_init_hostcache(struct rtable_private *tab)
{
struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache));
init_list(&hc->hostentries);
@@ -4042,11 +4376,26 @@ rt_init_hostcache(rtable *tab)
hc->lp = lp_new(tab->rp);
hc->trie = f_new_trie(hc->lp, 0);
+ hc->update = (event) {
+ .hook = rt_update_hostcache,
+ .data = tab,
+ };
+
+ hc->req = (struct rt_export_request) {
+ .name = mb_sprintf(tab->rp, "%s.hcu.notifier", tab->name),
+ .list = &global_work_list,
+ .trace_routes = tab->config->debug,
+ .dump_req = hc_notify_dump_req,
+ .export_one = hc_notify_export_one,
+ };
+
+ rt_table_export_start_locked(tab, &hc->req);
+
tab->hostcache = hc;
}
static void
-rt_free_hostcache(rtable *tab)
+rt_free_hostcache(struct rtable_private *tab)
{
struct hostcache *hc = tab->hostcache;
@@ -4068,16 +4417,6 @@ rt_free_hostcache(rtable *tab)
*/
}
-static void
-rt_notify_hostcache(rtable *tab, net *net)
-{
- if (tab->hcu_scheduled)
- return;
-
- if (trie_match_net(tab->hostcache->trie, net->n.addr))
- rt_schedule_hcu(tab);
-}
-
static int
if_local_addr(ip_addr a, struct iface *i)
{
@@ -4108,7 +4447,7 @@ rt_get_igp_metric(const rte *rt)
}
static int
-rt_update_hostentry(rtable *tab, struct hostentry *he)
+rt_update_hostentry(struct rtable_private *tab, struct hostentry *he)
{
ea_list *old_src = he->src;
int direct = 0;
@@ -4174,9 +4513,24 @@ done:
}
static void
-rt_update_hostcache(rtable *tab)
+rt_update_hostcache(void *data)
{
+ rtable **nhu_pending;
+
+ RT_LOCKED((rtable *) data, tab)
+ {
+
struct hostcache *hc = tab->hostcache;
+
+ if (rt_cork_check(&hc->update))
+ {
+ rt_trace(tab, D_STATES, "Hostcache update corked");
+ RT_RETURN(tab);
+ }
+
+ /* Destination schedule map */
+ nhu_pending = tmp_allocz(sizeof(rtable *) * rtable_max_id);
+
struct hostentry *he;
node *n, *x;
@@ -4194,14 +4548,18 @@ rt_update_hostcache(rtable *tab)
}
if (rt_update_hostentry(tab, he))
- rt_schedule_nhu(he->tab);
+ nhu_pending[he->tab->id] = he->tab;
}
+ }
- tab->hcu_scheduled = 0;
+ for (uint i=0; i<rtable_max_id; i++)
+ if (nhu_pending[i])
+ RT_LOCKED(nhu_pending[i], dst)
+ rt_schedule_nhu(dst);
}
static struct hostentry *
-rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
+rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep)
{
struct hostentry *he;
diff --git a/nest/rt.h b/nest/rt.h
index 1a6b7a93..3a8489e8 100644
--- a/nest/rt.h
+++ b/nest/rt.h
@@ -27,6 +27,7 @@ struct protocol;
struct proto;
struct channel;
struct rte_src;
+struct hostcache;
struct symbol;
struct timer;
struct filter;
@@ -52,31 +53,37 @@ struct rt_cork_threshold {
struct rtable_config {
node n;
char *name;
- struct rtable *table;
+ union rtable *table;
struct proto_config *krt_attached; /* Kernel syncer attached to this table */
uint addr_type; /* Type of address data stored in table (NET_*) */
uint gc_threshold; /* Maximum number of operations before GC is run */
uint gc_period; /* Approximate time between two consecutive GC runs */
byte sorted; /* Routes of network are sorted according to rte_better() */
byte trie_used; /* Rtable has attached trie */
- btime min_settle_time; /* Minimum settle time for notifications */
- btime max_settle_time; /* Maximum settle time for notifications */
+ byte debug; /* Whether to log */
btime export_settle_time; /* Delay before exports are announced */
struct rt_cork_threshold cork_threshold; /* Cork threshold values */
};
struct rt_export_hook;
struct rt_export_request;
+struct rt_exporter;
+
+struct rt_exporter_class {
+ void (*start)(struct rt_exporter *, struct rt_export_request *);
+ void (*stop)(struct rt_export_hook *);
+ void (*done)(void *_rt_export_hook);
+};
struct rt_exporter {
+ const struct rt_exporter_class *class;
+ pool *rp;
list hooks; /* Registered route export hooks */
uint addr_type; /* Type of address data exported (NET_*) */
+};
- struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *);
- void (*stop)(struct rt_export_hook *);
- void (*done)(struct rt_export_hook *);
- void (*used)(struct rt_exporter *);
-
+struct rt_table_exporter {
+ struct rt_exporter e;
list pending; /* List of packed struct rt_pending_export */
struct timer *export_timer;
@@ -84,39 +91,50 @@ struct rt_exporter {
u64 next_seq; /* The next export will have this ID */
};
-typedef struct rtable {
- resource r;
- node n; /* Node in list of all tables */
+extern uint rtable_max_id;
+
+DEFINE_DOMAIN(rtable);
+
+/* The public part of rtable structure */
+#define RTABLE_PUBLIC \
+ resource r; \
+ node n; /* Node in list of all tables */ \
+ char *name; /* Name of this table */ \
+ uint addr_type; /* Type of address data stored in table (NET_*) */ \
+ uint id; /* Integer table ID for fast lookup */ \
+ DOMAIN(rtable) lock; /* Lock to take to access the private parts */ \
+ struct rtable_config *config; /* Configuration of this table */ \
+
+/* The complete rtable structure */
+struct rtable_private {
+ /* Once more the public part */
+ RTABLE_PUBLIC;
+
+ /* Here the private items not to be accessed without locking */
pool *rp; /* Resource pool to allocate everything from, including itself */
struct slab *rte_slab; /* Slab to allocate route objects */
struct fib fib;
struct f_trie *trie; /* Trie of prefixes defined in fib */
- char *name; /* Name of this table */
- uint addr_type; /* Type of address data stored in table (NET_*) */
int use_count; /* Number of protocols using this table */
u32 rt_count; /* Number of routes in the table */
list imports; /* Registered route importers */
- struct rt_exporter exporter; /* Exporter API structure */
+ struct rt_table_exporter exporter; /* Exporter API structure */
struct hmap id_map;
struct hostcache *hostcache;
- struct rtable_config *config; /* Configuration of this table */
struct config *deleted; /* Table doesn't exist in current configuration,
* delete as soon as use_count becomes 0 and remove
* obstacle from this routing table.
*/
struct event *rt_event; /* Routing table event */
- struct event *uncork_event; /* Called when uncork happens */
+ struct event *nhu_event; /* Specific event for next hop update */
struct timer *prune_timer; /* Timer for periodic pruning / GC */
btime last_rt_change; /* Last time when route changed */
- btime base_settle_time; /* Start time of rtable settling interval */
btime gc_time; /* Time of last GC */
uint gc_counter; /* Number of operations since last GC */
byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */
byte prune_trie; /* Prune prefix trie during next table prune */
- byte hcu_scheduled; /* Hostcache update is scheduled */
- byte hcu_corked; /* Hostcache update is corked with this state */
byte nhu_state; /* Next Hop Update state */
byte nhu_corked; /* Next Hop Update is corked with this state */
byte export_used; /* Pending Export pruning is scheduled */
@@ -130,25 +148,28 @@ typedef struct rtable {
u32 trie_old_lock_count; /* Old prefix trie locked by walks */
struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */
- list subscribers; /* Subscribers for notifications */
- struct timer *settle_timer; /* Settle time for notifications */
- list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */
struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */
+};
+
+/* The final union private-public rtable structure */
+typedef union rtable {
+ struct {
+ RTABLE_PUBLIC;
+ };
+ struct rtable_private priv;
} rtable;
-struct rt_subscription {
- node n;
- rtable *tab;
- event *event;
- event_list *list;
-};
+#define RT_IS_LOCKED(tab) DOMAIN_IS_LOCKED(rtable, (tab)->lock)
-struct rt_flowspec_link {
- node n;
- rtable *src;
- rtable *dst;
- u32 uc;
-};
+#define RT_LOCK(tab) ({ LOCK_DOMAIN(rtable, (tab)->lock); &(tab)->priv; })
+#define RT_UNLOCK(tab) UNLOCK_DOMAIN(rtable, (tab)->lock)
+#define RT_PRIV(tab) ({ ASSERT_DIE(RT_IS_LOCKED((tab))); &(tab)->priv; })
+#define RT_PUB(tab) SKIP_BACK(rtable, priv, tab)
+
+#define RT_LOCKED(tpub, tpriv) for (struct rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL))
+#define RT_RETURN(tpriv, ...) do { RT_UNLOCK(tpriv); return __VA_ARGS__; } while (0)
+
+#define RT_PRIV_SAME(tpriv, tpub) (&(tpub)->priv == (tpriv))
extern struct rt_cork {
_Atomic uint active;
@@ -184,43 +205,12 @@ static inline int rt_cork_check(event *e)
}
-#define NHU_CLEAN 0
-#define NHU_SCHEDULED 1
-#define NHU_RUNNING 2
-#define NHU_DIRTY 3
-
typedef struct network {
struct rte_storage *routes; /* Available routes for this network */
struct rt_pending_export *first, *last;
struct fib_node n; /* FIB flags reserved for kernel syncer */
} net;
-struct hostcache {
- slab *slab; /* Slab holding all hostentries */
- struct hostentry **hash_table; /* Hash table for hostentries */
- unsigned hash_order, hash_shift;
- unsigned hash_max, hash_min;
- unsigned hash_items;
- linpool *lp; /* Linpool for trie */
- struct f_trie *trie; /* Trie of prefixes that might affect hostentries */
- list hostentries; /* List of all hostentries */
- byte update_hostcache;
-};
-
-struct hostentry {
- node ln;
- ip_addr addr; /* IP address of host, part of key */
- ip_addr link; /* (link-local) IP address of host, used as gw
- if host is directly attached */
- struct rtable *tab; /* Dependent table, part of key */
- struct hostentry *next; /* Next in hash chain */
- unsigned hash_key; /* Hash key */
- unsigned uc; /* Use count */
- ea_list *src; /* Source attributes */
- byte nexthop_linkable; /* Nexthop list is completely non-device */
- u32 igp_metric; /* Chosen route IGP metric */
-};
-
struct rte_storage {
struct rte_storage *next; /* Next in chain */
struct rte rte; /* Route data */
@@ -238,6 +228,8 @@ struct rt_import_request {
char *name;
u8 trace_routes;
+ event_list *list; /* Where to schedule announce events */
+
void (*dump_req)(struct rt_import_request *req);
void (*log_state_change)(struct rt_import_request *req, u8 state);
/* Preimport is called when the @new route is just-to-be inserted, replacing @old.
@@ -269,6 +261,7 @@ struct rt_import_hook {
u8 stale_pruning; /* Last prune started when this value was set at stale_valid */
void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */
+ event announce_event; /* This event announces table updates */
};
struct rt_pending_export {
@@ -314,29 +307,44 @@ struct rt_export_hook {
u32 withdraws_received; /* Number of route withdraws received */
} stats;
+ btime last_state_change; /* Time of last state transition */
+
+ _Atomic u8 export_state; /* Route export state (TES_*, see below) */
+ struct event event; /* Event running all the export operations */
+
+ struct bmap seq_map; /* Keep track which exports were already procesed */
+
+ void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */
+};
+
+struct rt_table_export_hook {
+ union {
+ struct rt_export_hook h;
+ struct { /* Overriding the parent structure beginning */
+ node _n;
+ struct rt_table_exporter *table;
+ };
+ };
+
union {
struct fib_iterator feed_fit; /* Routing table iterator used during feeding */
struct {
struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
struct f_trie *walk_lock; /* Locked trie for walking */
+ union { /* Last net visited but not processed */
+ net_addr walk_last;
+ net_addr_ip4 walk_last_ip4;
+ net_addr_ip6 walk_last_ip6;
+ };
};
- u32 hash_iter; /* Iterator over hash */
};
- struct bmap seq_map; /* Keep track which exports were already procesed */
-
- struct rt_pending_export * _Atomic last_export;/* Last export processed */
+ struct rt_pending_export *_Atomic last_export;/* Last export processed */
struct rt_pending_export *rpe_next; /* Next pending export to process */
- btime last_state_change; /* Time of last state transition */
-
u8 refeed_pending; /* Refeeding and another refeed is scheduled */
- _Atomic u8 export_state; /* Route export state (TES_*, see below) */
u8 feed_type; /* Which feeding method is used (TFT_*, see below) */
- struct event *event; /* Event running all the export operations */
-
- void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */
};
#define TIS_DOWN 0
@@ -365,7 +373,8 @@ struct rt_export_hook {
#define TFT_HASH 3
void rt_request_import(rtable *tab, struct rt_import_request *req);
-void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req);
+void rt_request_export(rtable *tab, struct rt_export_request *req);
+void rt_request_export_other(struct rt_exporter *tab, struct rt_export_request *req);
void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req);
@@ -382,15 +391,35 @@ void rt_set_export_state(struct rt_export_hook *hook, u8 state);
void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src);
+/*
+ * For table export processing
+ */
+
/* Get next rpe. If src is given, it must match. */
struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src);
+/* Walk all rpe's */
+#define RPE_WALK(first, it, src) \
+ for (struct rt_pending_export *it = (first); it; it = rpe_next(it, (src)))
+
/* Mark the pending export processed */
void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe);
+#define rpe_mark_seen_all(hook, first, src) \
+ RPE_WALK((first), _rpe, (src)) rpe_mark_seen((hook), _rpe)
+
/* Get pending export seen status */
int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe);
+/*
+ * For rt_export_hook and rt_exporter inheritance
+ */
+
+void rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook);
+struct rt_export_hook *rt_alloc_export(struct rt_exporter *re, uint size);
+void rt_export_stopped(struct rt_export_hook *hook);
+void rt_exporter_init(struct rt_exporter *re);
+
/* Types of route announcement, also used as flags */
#define RA_UNDEF 0 /* Undefined RA type */
#define RA_OPTIMAL 1 /* Announcement of optimal route change */
@@ -404,6 +433,49 @@ int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe);
#define RIC_REJECT -1 /* Rejected by protocol */
#define RIC_DROP -2 /* Silently dropped by protocol */
+/*
+ * Next hop update data structures
+ */
+
+#define NHU_CLEAN 0
+#define NHU_SCHEDULED 1
+#define NHU_RUNNING 2
+#define NHU_DIRTY 3
+
+struct hostentry {
+ node ln;
+ ip_addr addr; /* IP address of host, part of key */
+ ip_addr link; /* (link-local) IP address of host, used as gw
+ if host is directly attached */
+ rtable *tab; /* Dependent table, part of key */
+ struct hostentry *next; /* Next in hash chain */
+ unsigned hash_key; /* Hash key */
+ unsigned uc; /* Use count */
+ ea_list *src; /* Source attributes */
+ byte nexthop_linkable; /* Nexthop list is completely non-device */
+ u32 igp_metric; /* Chosen route IGP metric */
+};
+
+struct hostcache {
+ slab *slab; /* Slab holding all hostentries */
+ struct hostentry **hash_table; /* Hash table for hostentries */
+ unsigned hash_order, hash_shift;
+ unsigned hash_max, hash_min;
+ unsigned hash_items;
+ linpool *lp; /* Linpool for trie */
+ struct f_trie *trie; /* Trie of prefixes that might affect hostentries */
+ list hostentries; /* List of all hostentries */
+ event update;
+ struct rt_export_request req; /* Notifier */
+};
+
+struct rt_flowspec_link {
+ rtable *src;
+ rtable *dst;
+ u32 uc;
+ struct rt_export_request req;
+};
+
#define rte_update channel_rte_import
/**
* rte_update - enter a new update to a routing table
@@ -446,32 +518,36 @@ void rt_init(void);
void rt_preconfig(struct config *);
void rt_postconfig(struct config *);
void rt_commit(struct config *new, struct config *old);
-void rt_lock_table(rtable *);
-void rt_unlock_table(rtable *);
-struct f_trie * rt_lock_trie(rtable *tab);
-void rt_unlock_trie(rtable *tab, struct f_trie *trie);
-void rt_subscribe(rtable *tab, struct rt_subscription *s);
-void rt_unsubscribe(struct rt_subscription *s);
+void rt_lock_table_priv(struct rtable_private *, const char *file, uint line);
+void rt_unlock_table_priv(struct rtable_private *, const char *file, uint line);
+static inline void rt_lock_table_pub(rtable *t, const char *file, uint line)
+{ RT_LOCKED(t, tt) rt_lock_table_priv(tt, file, line); }
+static inline void rt_unlock_table_pub(rtable *t, const char *file, uint line)
+{ RT_LOCKED(t, tt) rt_unlock_table_priv(tt, file, line); }
+
+#define rt_lock_table(t) _Generic((t), rtable *: rt_lock_table_pub, \
+ struct rtable_private *: rt_lock_table_priv)((t), __FILE__, __LINE__)
+#define rt_unlock_table(t) _Generic((t), rtable *: rt_unlock_table_pub, \
+ struct rtable_private *: rt_unlock_table_priv)((t), __FILE__, __LINE__)
+
+struct f_trie * rt_lock_trie(struct rtable_private *tab);
+void rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie);
void rt_flowspec_link(rtable *src, rtable *dst);
void rt_flowspec_unlink(rtable *src, rtable *dst);
rtable *rt_setup(pool *, struct rtable_config *);
-static inline void rt_shutdown(rtable *r) { rfree(r->rp); }
-static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); }
-static inline net *net_find_valid(rtable *tab, const net_addr *addr)
+static inline net *net_find(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); }
+static inline net *net_find_valid(struct rtable_private *tab, const net_addr *addr)
{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; }
-static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); }
-net *net_get(rtable *tab, const net_addr *addr);
-net *net_route(rtable *tab, const net_addr *n);
+static inline net *net_get(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); }
+net *net_route(struct rtable_private *tab, const net_addr *n);
int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter);
rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent);
void rt_refresh_begin(struct rt_import_request *);
void rt_refresh_end(struct rt_import_request *);
void rt_modify_stale(rtable *t, struct rt_import_request *);
-void rt_schedule_prune(rtable *t);
+void rt_schedule_prune(struct rtable_private *t);
void rte_dump(struct rte_storage *);
-void rte_free(struct rte_storage *);
-struct rte_storage *rte_store(const rte *, net *net, rtable *);
void rt_dump(rtable *);
void rt_dump_all(void);
void rt_dump_hooks(rtable *);
@@ -481,6 +557,8 @@ void rt_reload_channel_abort(struct channel *c);
void rt_refeed_channel(struct channel *c);
void rt_prune_sync(rtable *t, int all);
struct rtable_config *rt_new_table(struct symbol *s, uint addr_type);
+void rt_new_default_table(struct symbol *s);
+struct rtable_config *rt_get_default_table(struct config *cf, uint addr_type);
static inline int rt_is_ip(rtable *tab)
{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); }
@@ -530,7 +608,7 @@ struct rt_show_data {
void rt_show(struct rt_show_data *);
struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name);
-struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t);
+struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t);
/* Value of table definition mode in struct rt_show_data */
#define RSD_TDB_DEFAULT 0 /* no table specified */
@@ -557,7 +635,7 @@ struct hostentry_adata {
};
void
-ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]);
+ea_set_hostentry(ea_list **to, rtable *dep, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]);
void ea_show_hostentry(const struct adata *ad, byte *buf, uint size);
void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad);