summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Zajicek (work) <santiago@crfreenet.org>2022-02-04 05:34:02 +0100
committerOndrej Zajicek (work) <santiago@crfreenet.org>2022-02-06 23:27:13 +0100
commit5a89edc6fd0b03716ccb77084e8d1a1910f52ab0 (patch)
tree9f2c066f1aab2cfbffb0d695e1050b8b39f20e0d
parentde6318f70a06854a324b436b3a4cca0b3d60024b (diff)
Nest: Implement locking of prefix tries during walks
The prune loop may may rebuild the prefix trie and therefore invalidate walk state for asynchronous walks (used in 'show route in' cmd). Fix it by adding locking that keeps the old trie in memory until current walks are done. In future this could be improved by rebuilding trie walk states (by lookup for last found prefix) after the prefix trie rebuild.
-rw-r--r--nest/route.h6
-rw-r--r--nest/rt-show.c9
-rw-r--r--nest/rt-table.c82
3 files changed, 95 insertions, 2 deletions
diff --git a/nest/route.h b/nest/route.h
index c4e0d2c6..7930058a 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -191,6 +191,9 @@ typedef struct rtable {
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
struct f_trie *trie_new; /* New prefix trie defined during pruning */
+ struct f_trie *trie_old; /* Old prefix trie waiting to be freed */
+ u32 trie_lock_count; /* Prefix trie locked by walks */
+ u32 trie_old_lock_count; /* Old prefix trie locked by walks */
list subscribers; /* Subscribers for notifications */
struct timer *settle_timer; /* Settle time for notifications */
@@ -332,6 +335,8 @@ void rt_preconfig(struct config *);
void rt_commit(struct config *new, struct config *old);
void rt_lock_table(rtable *);
void rt_unlock_table(rtable *);
+struct f_trie * rt_lock_trie(rtable *tab);
+void rt_unlock_trie(rtable *tab, struct f_trie *trie);
void rt_subscribe(rtable *tab, struct rt_subscription *s);
void rt_unsubscribe(struct rt_subscription *s);
void rt_flowspec_link(rtable *src, rtable *dst);
@@ -405,6 +410,7 @@ struct rt_show_data {
struct rt_show_data_rtable *last_table; /* Last table in output */
struct fib_iterator fit; /* Iterator over networks in table */
struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
+ struct f_trie *walk_lock; /* Locked trie for walking */
int verbose, tables_defined_by;
const struct filter *filter;
struct proto *show_protocol;
diff --git a/nest/rt-show.c b/nest/rt-show.c
index ea1f918c..f8b7ba51 100644
--- a/nest/rt-show.c
+++ b/nest/rt-show.c
@@ -222,6 +222,9 @@ rt_show_cleanup(struct cli *c)
if (d->table_open && !d->trie_walk)
fit_get(&d->tab->table->fib, &d->fit);
+ if (d->walk_lock)
+ rt_unlock_trie(d->tab->table, d->walk_lock);
+
/* Unlock referenced tables */
WALK_LIST(tab, d->tables)
rt_unlock_table(tab->table);
@@ -257,7 +260,10 @@ rt_show_cont(struct cli *c)
d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state));
if (d->trie_walk)
+ {
+ d->walk_lock = rt_lock_trie(tab);
trie_walk_init(d->walk_state, tab->trie, d->addr);
+ }
else
FIB_ITERATE_INIT(&d->fit, &tab->fib);
@@ -288,6 +294,9 @@ rt_show_cont(struct cli *c)
if (!--max)
return;
}
+
+ rt_unlock_trie(tab, d->walk_lock);
+ d->walk_lock = NULL;
}
else
{
diff --git a/nest/rt-table.c b/nest/rt-table.c
index ac14aec9..a10979e6 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -2471,7 +2471,19 @@ again:
if (tab->trie_new)
{
/* Finish prefix trie pruning */
- rfree(tab->trie->lp);
+
+ if (!tab->trie_lock_count)
+ {
+ rfree(tab->trie->lp);
+ }
+ else
+ {
+ ASSERT(!tab->trie_old);
+ tab->trie_old = tab->trie;
+ tab->trie_old_lock_count = tab->trie_lock_count;
+ tab->trie_lock_count = 0;
+ }
+
tab->trie = tab->trie_new;
tab->trie_new = NULL;
tab->prune_trie = 0;
@@ -2479,7 +2491,7 @@ again:
else
{
/* Schedule prefix trie pruning */
- if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
+ if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries)))
{
/* state change 0->1, 2->3 */
tab->prune_state |= 1;
@@ -2504,6 +2516,72 @@ again:
return;
}
+/**
+ * rt_lock_trie - lock a prefix trie of a routing table
+ * @tab: routing table with prefix trie to be locked
+ *
+ * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state
+ * structures. Therefore, asynchronous walks should lock the prefix trie using
+ * this function. That allows the prune loop to rebuild the trie, but postpones
+ * its freeing until all walks are done (unlocked by rt_unlock_trie()).
+ *
+ * Return a current trie that will be locked, the value should be passed back to
+ * rt_unlock_trie() for unlocking.
+ *
+ */
+struct f_trie *
+rt_lock_trie(rtable *tab)
+{
+ ASSERT(tab->trie);
+
+ tab->trie_lock_count++;
+ return tab->trie;
+}
+
+/**
+ * rt_unlock_trie - unlock a prefix trie of a routing table
+ * @tab: routing table with prefix trie to be locked
+ * @trie: value returned by matching rt_lock_trie()
+ *
+ * Done for trie locked by rt_lock_trie() after walk over the trie is done.
+ * It may free the trie and schedule next trie pruning.
+ */
+void
+rt_unlock_trie(rtable *tab, struct f_trie *trie)
+{
+ ASSERT(trie);
+
+ if (trie == tab->trie)
+ {
+ /* Unlock the current prefix trie */
+ ASSERT(tab->trie_lock_count);
+ tab->trie_lock_count--;
+ }
+ else if (trie == tab->trie_old)
+ {
+ /* Unlock the old prefix trie */
+ ASSERT(tab->trie_old_lock_count);
+ tab->trie_old_lock_count--;
+
+ /* Free old prefix trie that is no longer needed */
+ if (!tab->trie_old_lock_count)
+ {
+ rfree(tab->trie_old->lp);
+ tab->trie_old = NULL;
+
+ /* Kick prefix trie pruning that was postponed */
+ if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
+ {
+ tab->prune_trie = 1;
+ rt_schedule_prune(tab);
+ }
+ }
+ }
+ else
+ log(L_BUG "Invalid arg to rt_unlock_trie()");
+}
+
+
void
rt_preconfig(struct config *c)
{