diff options
-rw-r--r-- | doc/bird.sgml | 9 | ||||
-rw-r--r-- | nest/config.Y | 6 | ||||
-rw-r--r-- | nest/rt-table.c | 63 | ||||
-rw-r--r-- | nest/rt.h | 42 |
4 files changed, 119 insertions, 1 deletions
diff --git a/doc/bird.sgml b/doc/bird.sgml index c48b064c..a4d7cb0c 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -699,6 +699,15 @@ to set options. periods. Default: adaptive, based on number of routing tables in the configuration. From 10 s (with <= 25 routing tables) up to 600 s (with >= 1500 routing tables). + + <tag><label id="rtable-cork-threshold">cork threshold <m/number/ <m/number/</tag> + Too many pending exports may lead to memory bloating. In such cases, + BIRD tries to relieve the memory pressure by pausing some routines until + the queue sizes get low enough. This option allows the user to set the + thresholds; first value is the low threshold (when to resume), the + second one is the high threshold (when to pause). The higher is the + threshold, the more memory can get used. In most cases, the defaults + should work for you. Default: 128, 512. </descrip> diff --git a/nest/config.Y b/nest/config.Y index 4c758ea3..91147a29 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -125,7 +125,7 @@ CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) -CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) +CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -231,6 +231,10 @@ table_opt: | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } | GC THRESHOLD expr { this_table->gc_threshold = $3; } | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } + | CORK THRESHOLD expr expr { + if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); + this_table->cork_threshold.low = $3; + this_table->cork_threshold.high = $4; } ; table_opts: diff --git a/nest/rt-table.c b/nest/rt-table.c index 65fc142a..c1f3098b 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -119,6 +119,8 @@ static linpool *rte_update_pool; list routing_tables; list deleted_routing_tables; +struct rt_cork rt_cork; + /* Data structures for export journal */ #define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) @@ -144,6 +146,9 @@ static void rt_feed_by_trie(void *); static void rt_feed_equal(void *); static void rt_feed_for(void *); static uint rt_feed_net(struct rt_export_hook *c, net *n); +static void rt_check_cork_low(rtable *tab); +static void rt_check_cork_high(rtable *tab); +static void rt_cork_release_hook(void *); static inline void rt_export_used(struct rt_exporter *); static void rt_export_cleanup(rtable *tab); @@ -1371,6 +1376,8 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage if (tab->exporter.first == NULL) tab->exporter.first = rpe; + rt_check_cork_high(tab); + if (!tm_active(tab->exporter.export_timer)) tm_start(tab->exporter.export_timer, tab->config->export_settle_time); } @@ -2667,6 +2674,8 @@ rt_setup(pool *pp, struct rtable_config *cf) t->last_rt_change = t->gc_time = current_time(); t->exporter.next_seq = 1; + t->cork_threshold = cf->cork_threshold; + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; if (rt_is_flow(t)) @@ -2692,6 +2701,8 @@ rt_init(void) rte_update_pool = lp_new_default(rt_table_pool); init_list(&routing_tables); init_list(&deleted_routing_tables); + ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); + rt_cork.run = (event) { .hook = rt_cork_release_hook }; } @@ -3000,6 +3011,8 @@ rt_export_cleanup(rtable *tab) first = next; } + rt_check_cork_low(tab); + done:; struct rt_import_hook *ih; node *x; _Bool imports_stopped = 0; @@ -3030,6 +3043,16 @@ done:; tm_stop(tab->exporter.export_timer); } +static void +rt_cork_release_hook(void *data UNUSED) +{ + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); +} + /** * rt_lock_trie - lock a prefix trie of a routing table * @tab: routing table with prefix trie to be locked @@ -3610,6 +3633,8 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->cork_threshold.low = 128; + c->cork_threshold.high = 512; add_tail(&new_config->tables, &c->n); @@ -3655,6 +3680,36 @@ rt_unlock_table(rtable *r) } } +static void +rt_check_cork_low(rtable *tab) +{ + if (!tab->cork_active) + return; + + if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq)) + { + tab->cork_active = 0; + rt_cork_release(); + + if (config->table_debug) + log(L_TRACE "%s: Uncorked", tab->name); + } +} + +static void +rt_check_cork_high(rtable *tab) +{ + if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) + { + tab->cork_active = 1; + rt_cork_acquire(); + + if (config->table_debug) + log(L_TRACE "%s: Corked", tab->name); + } +} + + static int rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) { @@ -3668,6 +3723,14 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old tab->name = new->name; tab->config = new; + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; } @@ -18,6 +18,7 @@ #include "lib/fib.h" #include "lib/route.h" #include "lib/event.h" +#include "lib/rcu.h" #include <stdatomic.h> @@ -33,6 +34,10 @@ struct f_trie; struct f_trie_walk_state; struct cli; +struct rt_cork_threshold { + u64 low, high; +}; + /* * Master Routing Tables. Generally speaking, each of them contains a FIB * with each entry pointing to a list of route entries representing routes @@ -57,6 +62,7 @@ struct rtable_config { btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ btime export_settle_time; /* Delay before exports are announced */ + struct rt_cork_threshold cork_threshold; /* Cork threshold values */ }; struct rt_export_hook; @@ -111,6 +117,8 @@ typedef struct rtable { byte hcu_scheduled; /* Hostcache update is scheduled */ byte nhu_state; /* Next Hop Update state */ byte export_used; /* Pending Export pruning is scheduled */ + byte cork_active; /* Cork has been activated */ + struct rt_cork_threshold cork_threshold; /* Threshold for table cork */ struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ struct f_trie *trie_new; /* New prefix trie defined during pruning */ @@ -139,6 +147,40 @@ struct rt_flowspec_link { u32 uc; }; +extern struct rt_cork { + _Atomic uint active; + event_list queue; + event run; +} rt_cork; + +static inline void rt_cork_acquire(void) +{ + atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel); +} + +static inline void rt_cork_release(void) +{ + if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1) + { + synchronize_rcu(); + ev_schedule_work(&rt_cork.run); + } +} + +static inline int rt_cork_check(event *e) +{ + rcu_read_lock(); + + int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0); + if (corked) + ev_send(&rt_cork.queue, e); + + rcu_read_unlock(); + + return corked; +} + + #define NHU_CLEAN 0 #define NHU_SCHEDULED 1 #define NHU_RUNNING 2 |