diff options
author | Ondrej Zajicek <santiago@crfreenet.org> | 2022-06-04 17:34:57 +0200 |
---|---|---|
committer | Ondrej Zajicek <santiago@crfreenet.org> | 2022-06-04 17:34:57 +0200 |
commit | a8a3d95be5db1a8a7d5a17e2eb8e233417b1d8c7 (patch) | |
tree | c91bd47eaf319a54af76ef74b1a1283621fc2676 | |
parent | 9e60b500c76945ccdea94d3a67e7acfde5e3f969 (diff) |
Nest: Improve GC strategy for rtables
Use timer (configurable as 'gc period') to schedule routing table
GC/pruning to ensure that prune is done on time but not too often.
Randomize GC timers to avoid concentration of GC events from different
tables in one loop cycle.
Fix a bug that caused minimum inter-GC interval be 5 us instead of 5 s.
Make default 'gc period' adaptive based on number of routing tables,
from 10 s for small setups to 600 s for large ones.
In marge multi-table RS setup, the patch improved time of flushing
a downed peer from 20-30 min to <2 min and removed 40s latencies.
-rw-r--r-- | conf/conf.c | 1 | ||||
-rw-r--r-- | doc/bird.sgml | 15 | ||||
-rw-r--r-- | nest/config.Y | 4 | ||||
-rw-r--r-- | nest/route.h | 8 | ||||
-rw-r--r-- | nest/rt-table.c | 54 |
5 files changed, 70 insertions, 12 deletions
diff --git a/conf/conf.c b/conf/conf.c index a2b01667..025c040e 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -140,6 +140,7 @@ config_parse(struct config *c) protos_preconfig(c); rt_preconfig(c); cf_parse(); + rt_postconfig(c); if (EMPTY_LIST(c->protos)) cf_error("No protocol is specified in the config file"); diff --git a/doc/bird.sgml b/doc/bird.sgml index 1580facd..326fc7a8 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -684,6 +684,21 @@ to set options. limit to the settle time from the initial ROA table change even if there are consecutive updates gradually renewing the settle time. Default: 20 s. + + <tag><label id="rtable-gc-threshold">gc threshold <m/number/</tag> + Specify a minimum amount of removed networks that triggers a garbage + collection (GC) cycle. Default: 1000. + + <tag><label id="rtable-gc-period">gc period <m/time/</tag> + Specify a period of time between consecutive GC cycles. When there is a + significant amount of route withdraws, GC cycles are executed repeatedly + with given period time (with some random factor). When there is just + small amount of changes, GC cycles are not executed. In extensive route + server setups, running GC on hundreds of full BGP routing tables can + take significant amount of time, therefore they should use higher GC + periods. Default: adaptive, based on number of routing tables in the + configuration. From 10 s (with <= 25 routing tables) up to 600 s (with + >= 1500 routing tables). </descrip> diff --git a/nest/config.Y b/nest/config.Y index 72bc7930..17999422 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -125,7 +125,7 @@ CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) -CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME) +CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -229,6 +229,8 @@ table_opt: } | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } + | GC THRESHOLD expr { this_table->gc_threshold = $3; } + | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } ; table_opts: diff --git a/nest/route.h b/nest/route.h index 7930058a..395139a3 100644 --- a/nest/route.h +++ b/nest/route.h @@ -148,8 +148,8 @@ struct rtable_config { struct rtable *table; struct proto_config *krt_attached; /* Kernel syncer attached to this table */ uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ + uint gc_threshold; /* Maximum number of operations before GC is run */ + uint gc_period; /* Approximate time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ byte internal; /* Internal table of a protocol */ byte trie_used; /* Rtable has attached trie */ @@ -180,10 +180,11 @@ typedef struct rtable { * obstacle from this routing table. */ struct event *rt_event; /* Routing table event */ + struct timer *prune_timer; /* Timer for periodic pruning / GC */ btime last_rt_change; /* Last time when route changed */ btime base_settle_time; /* Start time of rtable settling interval */ btime gc_time; /* Time of last GC */ - int gc_counter; /* Number of operations since last GC */ + uint gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte prune_trie; /* Prune prefix trie during next table prune */ byte hcu_scheduled; /* Hostcache update is scheduled */ @@ -332,6 +333,7 @@ struct config; void rt_init(void); void rt_preconfig(struct config *); +void rt_postconfig(struct config *); void rt_commit(struct config *new, struct config *old); void rt_lock_table(rtable *); void rt_unlock_table(rtable *); diff --git a/nest/rt-table.c b/nest/rt-table.c index c3563171..2cf55421 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -124,6 +124,7 @@ static void rt_next_hop_update(rtable *tab); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); +static void rt_kick_prune_timer(rtable *tab); static void @@ -1641,9 +1642,8 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best); if (!net->routes && - (table->gc_counter++ >= table->config->gc_max_ops) && - (table->gc_time + table->config->gc_min_time <= current_time())) - rt_schedule_prune(table); + (table->gc_counter++ >= table->config->gc_threshold)) + rt_kick_prune_timer(table); if (old_ok && p->rte_remove) p->rte_remove(net, old); @@ -2098,6 +2098,29 @@ rt_event(void *ptr) } +static void +rt_prune_timer(timer *t) +{ + rtable *tab = t->data; + + if (tab->gc_counter >= tab->config->gc_threshold) + rt_schedule_prune(tab); +} + +static void +rt_kick_prune_timer(rtable *tab) +{ + /* Return if prune is already scheduled */ + if (tm_active(tab->prune_timer) || (tab->prune_state & 1)) + return; + + /* Randomize GC period to +/- 50% */ + btime gc_period = tab->config->gc_period; + gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period); + tm_start(tab->prune_timer, gc_period); +} + + static inline btime rt_settled_time(rtable *tab) { @@ -2333,6 +2356,7 @@ rt_setup(pool *pp, struct rtable_config *cf) hmap_set(&t->id_map, 0); t->rt_event = ev_new_init(p, rt_event, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); t->last_rt_change = t->gc_time = current_time(); if (rt_is_flow(t)) @@ -2403,6 +2427,9 @@ rt_prune_table(rtable *tab) FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; + tab->gc_counter = 0; + tab->gc_time = current_time(); + if (tab->prune_trie) { /* Init prefix trie pruning */ @@ -2462,9 +2489,6 @@ again: fib_check(&tab->fib); #endif - tab->gc_counter = 0; - tab->gc_time = current_time(); - /* state change 2->0, 3->1 */ tab->prune_state &= 1; @@ -2591,6 +2615,20 @@ rt_preconfig(struct config *c) rt_new_table(cf_get_symbol("master6"), NET_IP6); } +void +rt_postconfig(struct config *c) +{ + uint num_tables = list_length(&c->tables); + btime def_gc_period = 400 MS * num_tables; + def_gc_period = MAX(def_gc_period, 10 S); + def_gc_period = MIN(def_gc_period, 600 S); + + struct rtable_config *rc; + WALK_LIST(rc, c->tables) + if (rc->gc_period == (uint) -1) + rc->gc_period = (uint) def_gc_period; +} + /* * Some functions for handing internal next hop updates @@ -2999,8 +3037,8 @@ rt_new_table(struct symbol *s, uint addr_type) cf_define_symbol(s, SYM_TABLE, table, c); c->name = s->name; c->addr_type = addr_type; - c->gc_max_ops = 1000; - c->gc_min_time = 5; + c->gc_threshold = 1000; + c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->min_settle_time = 1 S; c->max_settle_time = 20 S; |