summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2022-06-04 17:34:57 +0200
committerOndrej Zajicek <santiago@crfreenet.org>2022-06-04 17:34:57 +0200
commita8a3d95be5db1a8a7d5a17e2eb8e233417b1d8c7 (patch)
treec91bd47eaf319a54af76ef74b1a1283621fc2676
parent9e60b500c76945ccdea94d3a67e7acfde5e3f969 (diff)
Nest: Improve GC strategy for rtables
Use timer (configurable as 'gc period') to schedule routing table GC/pruning to ensure that prune is done on time but not too often. Randomize GC timers to avoid concentration of GC events from different tables in one loop cycle. Fix a bug that caused minimum inter-GC interval be 5 us instead of 5 s. Make default 'gc period' adaptive based on number of routing tables, from 10 s for small setups to 600 s for large ones. In marge multi-table RS setup, the patch improved time of flushing a downed peer from 20-30 min to <2 min and removed 40s latencies.
-rw-r--r--conf/conf.c1
-rw-r--r--doc/bird.sgml15
-rw-r--r--nest/config.Y4
-rw-r--r--nest/route.h8
-rw-r--r--nest/rt-table.c54
5 files changed, 70 insertions, 12 deletions
diff --git a/conf/conf.c b/conf/conf.c
index a2b01667..025c040e 100644
--- a/conf/conf.c
+++ b/conf/conf.c
@@ -140,6 +140,7 @@ config_parse(struct config *c)
protos_preconfig(c);
rt_preconfig(c);
cf_parse();
+ rt_postconfig(c);
if (EMPTY_LIST(c->protos))
cf_error("No protocol is specified in the config file");
diff --git a/doc/bird.sgml b/doc/bird.sgml
index 1580facd..326fc7a8 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -684,6 +684,21 @@ to set options.
limit to the settle time from the initial ROA table change even if
there are consecutive updates gradually renewing the settle time.
Default: 20 s.
+
+ <tag><label id="rtable-gc-threshold">gc threshold <m/number/</tag>
+ Specify a minimum amount of removed networks that triggers a garbage
+ collection (GC) cycle. Default: 1000.
+
+ <tag><label id="rtable-gc-period">gc period <m/time/</tag>
+ Specify a period of time between consecutive GC cycles. When there is a
+ significant amount of route withdraws, GC cycles are executed repeatedly
+ with given period time (with some random factor). When there is just
+ small amount of changes, GC cycles are not executed. In extensive route
+ server setups, running GC on hundreds of full BGP routing tables can
+ take significant amount of time, therefore they should use higher GC
+ periods. Default: adaptive, based on number of routing tables in the
+ configuration. From 10 s (with <= 25 routing tables) up to 600 s (with
+ >= 1500 routing tables).
</descrip>
diff --git a/nest/config.Y b/nest/config.Y
index 72bc7930..17999422 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -125,7 +125,7 @@ CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US)
CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS)
CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE)
CF_KEYWORDS(CHECK, LINK)
-CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME)
+CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD)
/* For r_args_channel */
CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC)
@@ -229,6 +229,8 @@ table_opt:
}
| MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; }
| MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; }
+ | GC THRESHOLD expr { this_table->gc_threshold = $3; }
+ | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); }
;
table_opts:
diff --git a/nest/route.h b/nest/route.h
index 7930058a..395139a3 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -148,8 +148,8 @@ struct rtable_config {
struct rtable *table;
struct proto_config *krt_attached; /* Kernel syncer attached to this table */
uint addr_type; /* Type of address data stored in table (NET_*) */
- int gc_max_ops; /* Maximum number of operations before GC is run */
- int gc_min_time; /* Minimum time between two consecutive GC runs */
+ uint gc_threshold; /* Maximum number of operations before GC is run */
+ uint gc_period; /* Approximate time between two consecutive GC runs */
byte sorted; /* Routes of network are sorted according to rte_better() */
byte internal; /* Internal table of a protocol */
byte trie_used; /* Rtable has attached trie */
@@ -180,10 +180,11 @@ typedef struct rtable {
* obstacle from this routing table.
*/
struct event *rt_event; /* Routing table event */
+ struct timer *prune_timer; /* Timer for periodic pruning / GC */
btime last_rt_change; /* Last time when route changed */
btime base_settle_time; /* Start time of rtable settling interval */
btime gc_time; /* Time of last GC */
- int gc_counter; /* Number of operations since last GC */
+ uint gc_counter; /* Number of operations since last GC */
byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */
byte prune_trie; /* Prune prefix trie during next table prune */
byte hcu_scheduled; /* Hostcache update is scheduled */
@@ -332,6 +333,7 @@ struct config;
void rt_init(void);
void rt_preconfig(struct config *);
+void rt_postconfig(struct config *);
void rt_commit(struct config *new, struct config *old);
void rt_lock_table(rtable *);
void rt_unlock_table(rtable *);
diff --git a/nest/rt-table.c b/nest/rt-table.c
index c3563171..2cf55421 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -124,6 +124,7 @@ static void rt_next_hop_update(rtable *tab);
static inline void rt_prune_table(rtable *tab);
static inline void rt_schedule_notify(rtable *tab);
static void rt_flowspec_notify(rtable *tab, net *net);
+static void rt_kick_prune_timer(rtable *tab);
static void
@@ -1641,9 +1642,8 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best);
if (!net->routes &&
- (table->gc_counter++ >= table->config->gc_max_ops) &&
- (table->gc_time + table->config->gc_min_time <= current_time()))
- rt_schedule_prune(table);
+ (table->gc_counter++ >= table->config->gc_threshold))
+ rt_kick_prune_timer(table);
if (old_ok && p->rte_remove)
p->rte_remove(net, old);
@@ -2098,6 +2098,29 @@ rt_event(void *ptr)
}
+static void
+rt_prune_timer(timer *t)
+{
+ rtable *tab = t->data;
+
+ if (tab->gc_counter >= tab->config->gc_threshold)
+ rt_schedule_prune(tab);
+}
+
+static void
+rt_kick_prune_timer(rtable *tab)
+{
+ /* Return if prune is already scheduled */
+ if (tm_active(tab->prune_timer) || (tab->prune_state & 1))
+ return;
+
+ /* Randomize GC period to +/- 50% */
+ btime gc_period = tab->config->gc_period;
+ gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period);
+ tm_start(tab->prune_timer, gc_period);
+}
+
+
static inline btime
rt_settled_time(rtable *tab)
{
@@ -2333,6 +2356,7 @@ rt_setup(pool *pp, struct rtable_config *cf)
hmap_set(&t->id_map, 0);
t->rt_event = ev_new_init(p, rt_event, t);
+ t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0);
t->last_rt_change = t->gc_time = current_time();
if (rt_is_flow(t))
@@ -2403,6 +2427,9 @@ rt_prune_table(rtable *tab)
FIB_ITERATE_INIT(fit, &tab->fib);
tab->prune_state = 2;
+ tab->gc_counter = 0;
+ tab->gc_time = current_time();
+
if (tab->prune_trie)
{
/* Init prefix trie pruning */
@@ -2462,9 +2489,6 @@ again:
fib_check(&tab->fib);
#endif
- tab->gc_counter = 0;
- tab->gc_time = current_time();
-
/* state change 2->0, 3->1 */
tab->prune_state &= 1;
@@ -2591,6 +2615,20 @@ rt_preconfig(struct config *c)
rt_new_table(cf_get_symbol("master6"), NET_IP6);
}
+void
+rt_postconfig(struct config *c)
+{
+ uint num_tables = list_length(&c->tables);
+ btime def_gc_period = 400 MS * num_tables;
+ def_gc_period = MAX(def_gc_period, 10 S);
+ def_gc_period = MIN(def_gc_period, 600 S);
+
+ struct rtable_config *rc;
+ WALK_LIST(rc, c->tables)
+ if (rc->gc_period == (uint) -1)
+ rc->gc_period = (uint) def_gc_period;
+}
+
/*
* Some functions for handing internal next hop updates
@@ -2999,8 +3037,8 @@ rt_new_table(struct symbol *s, uint addr_type)
cf_define_symbol(s, SYM_TABLE, table, c);
c->name = s->name;
c->addr_type = addr_type;
- c->gc_max_ops = 1000;
- c->gc_min_time = 5;
+ c->gc_threshold = 1000;
+ c->gc_period = (uint) -1; /* set in rt_postconfig() */
c->min_settle_time = 1 S;
c->max_settle_time = 20 S;