summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-07-28 13:50:59 +0200
committerMaria Matejka <mq@ucw.cz>2022-08-02 17:34:38 +0200
commitf8500b5943490ca62a1538fead4d8a88ad189c5a (patch)
tree6b1dfdbd707ea94bd1c0128021bd3ed0b7d49589
parent058ed711397df75350d905fc135758a6470c0143 (diff)
Route table cork: Indicate whether the export queues are congested.
These routines detect the export congestion (as defined by configurable thresholds) and propagate the state to readers. There are no readers for now, they will be added in following commits.
-rw-r--r--doc/bird.sgml9
-rw-r--r--nest/config.Y6
-rw-r--r--nest/rt-table.c63
-rw-r--r--nest/rt.h42
4 files changed, 119 insertions, 1 deletions
diff --git a/doc/bird.sgml b/doc/bird.sgml
index c48b064c..a4d7cb0c 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -699,6 +699,15 @@ to set options.
periods. Default: adaptive, based on number of routing tables in the
configuration. From 10 s (with <= 25 routing tables) up to 600 s (with
>= 1500 routing tables).
+
+ <tag><label id="rtable-cork-threshold">cork threshold <m/number/ <m/number/</tag>
+ Too many pending exports may lead to memory bloating. In such cases,
+ BIRD tries to relieve the memory pressure by pausing some routines until
+ the queue sizes get low enough. This option allows the user to set the
+ thresholds; first value is the low threshold (when to resume), the
+ second one is the high threshold (when to pause). The higher is the
+ threshold, the more memory can get used. In most cases, the defaults
+ should work for you. Default: 128, 512.
</descrip>
diff --git a/nest/config.Y b/nest/config.Y
index 4c758ea3..91147a29 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -125,7 +125,7 @@ CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US)
CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS)
CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE)
CF_KEYWORDS(CHECK, LINK)
-CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD)
+CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD)
/* For r_args_channel */
CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC)
@@ -231,6 +231,10 @@ table_opt:
| MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; }
| GC THRESHOLD expr { this_table->gc_threshold = $3; }
| GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); }
+ | CORK THRESHOLD expr expr {
+ if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold.");
+ this_table->cork_threshold.low = $3;
+ this_table->cork_threshold.high = $4; }
;
table_opts:
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 65fc142a..c1f3098b 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -119,6 +119,8 @@ static linpool *rte_update_pool;
list routing_tables;
list deleted_routing_tables;
+struct rt_cork rt_cork;
+
/* Data structures for export journal */
#define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export)
@@ -144,6 +146,9 @@ static void rt_feed_by_trie(void *);
static void rt_feed_equal(void *);
static void rt_feed_for(void *);
static uint rt_feed_net(struct rt_export_hook *c, net *n);
+static void rt_check_cork_low(rtable *tab);
+static void rt_check_cork_high(rtable *tab);
+static void rt_cork_release_hook(void *);
static inline void rt_export_used(struct rt_exporter *);
static void rt_export_cleanup(rtable *tab);
@@ -1371,6 +1376,8 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage
if (tab->exporter.first == NULL)
tab->exporter.first = rpe;
+ rt_check_cork_high(tab);
+
if (!tm_active(tab->exporter.export_timer))
tm_start(tab->exporter.export_timer, tab->config->export_settle_time);
}
@@ -2667,6 +2674,8 @@ rt_setup(pool *pp, struct rtable_config *cf)
t->last_rt_change = t->gc_time = current_time();
t->exporter.next_seq = 1;
+ t->cork_threshold = cf->cork_threshold;
+
t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS;
if (rt_is_flow(t))
@@ -2692,6 +2701,8 @@ rt_init(void)
rte_update_pool = lp_new_default(rt_table_pool);
init_list(&routing_tables);
init_list(&deleted_routing_tables);
+ ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release");
+ rt_cork.run = (event) { .hook = rt_cork_release_hook };
}
@@ -3000,6 +3011,8 @@ rt_export_cleanup(rtable *tab)
first = next;
}
+ rt_check_cork_low(tab);
+
done:;
struct rt_import_hook *ih; node *x;
_Bool imports_stopped = 0;
@@ -3030,6 +3043,16 @@ done:;
tm_stop(tab->exporter.export_timer);
}
+static void
+rt_cork_release_hook(void *data UNUSED)
+{
+ do synchronize_rcu();
+ while (
+ !atomic_load_explicit(&rt_cork.active, memory_order_acquire) &&
+ ev_run_list(&rt_cork.queue)
+ );
+}
+
/**
* rt_lock_trie - lock a prefix trie of a routing table
* @tab: routing table with prefix trie to be locked
@@ -3610,6 +3633,8 @@ rt_new_table(struct symbol *s, uint addr_type)
c->gc_period = (uint) -1; /* set in rt_postconfig() */
c->min_settle_time = 1 S;
c->max_settle_time = 20 S;
+ c->cork_threshold.low = 128;
+ c->cork_threshold.high = 512;
add_tail(&new_config->tables, &c->n);
@@ -3655,6 +3680,36 @@ rt_unlock_table(rtable *r)
}
}
+static void
+rt_check_cork_low(rtable *tab)
+{
+ if (!tab->cork_active)
+ return;
+
+ if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq))
+ {
+ tab->cork_active = 0;
+ rt_cork_release();
+
+ if (config->table_debug)
+ log(L_TRACE "%s: Uncorked", tab->name);
+ }
+}
+
+static void
+rt_check_cork_high(rtable *tab)
+{
+ if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq))
+ {
+ tab->cork_active = 1;
+ rt_cork_acquire();
+
+ if (config->table_debug)
+ log(L_TRACE "%s: Corked", tab->name);
+ }
+}
+
+
static int
rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old)
{
@@ -3668,6 +3723,14 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old
tab->name = new->name;
tab->config = new;
+ tab->cork_threshold = new->cork_threshold;
+
+ if (new->cork_threshold.high != old->cork_threshold.high)
+ rt_check_cork_high(tab);
+
+ if (new->cork_threshold.low != old->cork_threshold.low)
+ rt_check_cork_low(tab);
+
return 1;
}
diff --git a/nest/rt.h b/nest/rt.h
index 5acbded6..58c6ec33 100644
--- a/nest/rt.h
+++ b/nest/rt.h
@@ -18,6 +18,7 @@
#include "lib/fib.h"
#include "lib/route.h"
#include "lib/event.h"
+#include "lib/rcu.h"
#include <stdatomic.h>
@@ -33,6 +34,10 @@ struct f_trie;
struct f_trie_walk_state;
struct cli;
+struct rt_cork_threshold {
+ u64 low, high;
+};
+
/*
* Master Routing Tables. Generally speaking, each of them contains a FIB
* with each entry pointing to a list of route entries representing routes
@@ -57,6 +62,7 @@ struct rtable_config {
btime min_settle_time; /* Minimum settle time for notifications */
btime max_settle_time; /* Maximum settle time for notifications */
btime export_settle_time; /* Delay before exports are announced */
+ struct rt_cork_threshold cork_threshold; /* Cork threshold values */
};
struct rt_export_hook;
@@ -111,6 +117,8 @@ typedef struct rtable {
byte hcu_scheduled; /* Hostcache update is scheduled */
byte nhu_state; /* Next Hop Update state */
byte export_used; /* Pending Export pruning is scheduled */
+ byte cork_active; /* Cork has been activated */
+ struct rt_cork_threshold cork_threshold; /* Threshold for table cork */
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
struct f_trie *trie_new; /* New prefix trie defined during pruning */
@@ -139,6 +147,40 @@ struct rt_flowspec_link {
u32 uc;
};
+extern struct rt_cork {
+ _Atomic uint active;
+ event_list queue;
+ event run;
+} rt_cork;
+
+static inline void rt_cork_acquire(void)
+{
+ atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel);
+}
+
+static inline void rt_cork_release(void)
+{
+ if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1)
+ {
+ synchronize_rcu();
+ ev_schedule_work(&rt_cork.run);
+ }
+}
+
+static inline int rt_cork_check(event *e)
+{
+ rcu_read_lock();
+
+ int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0);
+ if (corked)
+ ev_send(&rt_cork.queue, e);
+
+ rcu_read_unlock();
+
+ return corked;
+}
+
+
#define NHU_CLEAN 0
#define NHU_SCHEDULED 1
#define NHU_RUNNING 2