summaryrefslogtreecommitdiff
path: root/nest/rt-attr.c
diff options
context:
space:
mode:
Diffstat (limited to 'nest/rt-attr.c')
-rw-r--r--nest/rt-attr.c394
1 files changed, 298 insertions, 96 deletions
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index c630aa95..357cd216 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -54,6 +54,7 @@
#include "lib/hash.h"
#include "lib/idm.h"
#include "lib/resource.h"
+#include "lib/rcu.h"
#include "lib/string.h"
#include <stddef.h>
@@ -61,7 +62,6 @@
const adata null_adata; /* adata of length 0 */
const char * const rta_src_names[RTS_MAX] = {
- [RTS_DUMMY] = "",
[RTS_STATIC] = "static",
[RTS_INHERIT] = "inherit",
[RTS_DEVICE] = "device",
@@ -86,7 +86,14 @@ const char * rta_dest_names[RTD_MAX] = {
[RTD_PROHIBIT] = "prohibited",
};
+DEFINE_DOMAIN(attrs);
+static DOMAIN(attrs) src_domain;
+
+#define SRC_LOCK LOCK_DOMAIN(attrs, src_domain)
+#define SRC_UNLOCK UNLOCK_DOMAIN(attrs, src_domain)
+
pool *rta_pool;
+pool *src_pool;
static slab *rta_slab_[4];
static slab *nexthop_slab_[4];
@@ -97,72 +104,151 @@ static struct idm src_ids;
/* rte source hash */
-#define RSH_KEY(n) n->proto, n->private_id
+#define RSH_KEY(n) n->private_id
#define RSH_NEXT(n) n->next
-#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2
-#define RSH_FN(p,n) p->hash_key ^ u32_hash(n)
+#define RSH_EQ(n1,n2) n1 == n2
+#define RSH_FN(n) u32_hash(n)
#define RSH_REHASH rte_src_rehash
#define RSH_PARAMS /2, *2, 1, 1, 8, 20
-#define RSH_INIT_ORDER 6
-
-static HASH(struct rte_src) src_hash;
+#define RSH_INIT_ORDER 2
static void
rte_src_init(void)
{
- rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));
-
- idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
+ src_domain = DOMAIN_NEW(attrs, "Route sources");
+ src_pool = rp_new(&root_pool, &main_birdloop, "Route sources");
+ rte_src_slab = sl_new(src_pool, sizeof(struct rte_src));
- HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER);
+ idm_init(&src_ids, src_pool, SRC_ID_INIT_SIZE);
}
-
HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
-struct rte_src *
-rt_find_source(struct proto *p, u32 id)
+static struct rte_src *
+rt_find_source(struct rte_owner *p, u32 id)
{
- return HASH_FIND(src_hash, RSH, p, id);
+ return HASH_FIND(p->hash, RSH, id);
}
struct rte_src *
-rt_get_source(struct proto *p, u32 id)
+rt_get_source_o(struct rte_owner *p, u32 id)
{
+ if (p->stop)
+ bug("Stopping route owner asked for another source.");
+
struct rte_src *src = rt_find_source(p, id);
if (src)
+ {
+ UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel);
return src;
+ }
+ SRC_LOCK;
src = sl_allocz(rte_src_slab);
- src->proto = p;
+ src->owner = p;
src->private_id = id;
src->global_id = idm_alloc(&src_ids);
- src->uc = 0;
- HASH_INSERT2(src_hash, RSH, rta_pool, src);
+ atomic_store_explicit(&src->uc, 1, memory_order_release);
+ p->uc++;
+
+ HASH_INSERT2(p->hash, RSH, src_pool, src);
+ if (config->table_debug)
+ log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now",
+ p->name, src->private_id, src->global_id, p->uc);
+
+ SRC_UNLOCK;
return src;
}
+static inline void
+rt_done_sources(struct rte_owner *o)
+{
+ if (o->stop->list)
+ ev_send(o->stop->list, o->stop);
+ else
+ ev_send(o->list, o->stop);
+}
+
void
-rt_prune_sources(void)
+rt_prune_sources(void *data)
{
- HASH_WALK_FILTER(src_hash, next, src, sp)
+ struct rte_owner *o = data;
+
+ HASH_WALK_FILTER(o->hash, next, src, sp)
{
- if (src->uc == 0)
+ u64 uc;
+ while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT)
+ ;
+
+ if (uc == 0)
{
- HASH_DO_REMOVE(src_hash, RSH, sp);
+ o->uc--;
+
+ HASH_DO_REMOVE(o->hash, RSH, sp);
+
+ SRC_LOCK;
idm_free(&src_ids, src->global_id);
sl_free(rte_src_slab, src);
+ SRC_UNLOCK;
}
}
HASH_WALK_FILTER_END;
- HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool);
+ SRC_LOCK;
+ HASH_MAY_RESIZE_DOWN(o->hash, RSH, src_pool);
+
+ if (o->stop && !o->uc)
+ {
+ rfree(o->prune);
+ SRC_UNLOCK;
+
+ if (config->table_debug)
+ log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name);
+
+ rt_done_sources(o);
+ }
+ else
+ SRC_UNLOCK;
+}
+
+void
+rt_init_sources(struct rte_owner *o, const char *name, event_list *list)
+{
+ SRC_LOCK;
+ HASH_INIT(o->hash, src_pool, RSH_INIT_ORDER);
+ o->hash_key = random_u32();
+ o->uc = 0;
+ o->name = name;
+ o->prune = ev_new_init(src_pool, rt_prune_sources, o);
+ o->stop = NULL;
+ o->list = list;
+ SRC_UNLOCK;
}
+void
+rt_destroy_sources(struct rte_owner *o, event *done)
+{
+ o->stop = done;
+
+ if (!o->uc)
+ {
+ if (config->table_debug)
+ log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name);
+
+ SRC_LOCK;
+ rfree(o->prune);
+ SRC_UNLOCK;
+
+ rt_done_sources(o);
+ }
+ else
+ if (config->table_debug)
+ log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc);
+}
/*
* Multipath Next Hop
@@ -541,8 +627,8 @@ ea_walk(struct ea_walk_state *s, uint id, uint max)
* by calling ea_find() to find the attribute, extracting its value or returning
* a provided default if no such attribute is present.
*/
-int
-ea_get_int(ea_list *e, unsigned id, int def)
+uintptr_t
+ea_get_int(ea_list *e, unsigned id, uintptr_t def)
{
eattr *a = ea_find(e, id);
if (!a)
@@ -1081,21 +1167,28 @@ ea_append(ea_list *to, ea_list *what)
* rta's
*/
-static uint rta_cache_count;
-static uint rta_cache_size = 32;
-static uint rta_cache_limit;
-static uint rta_cache_mask;
-static rta **rta_hash_table;
+static DOMAIN(attrs) attrs_domain;
-static void
-rta_alloc_hash(void)
+#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain)
+#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain)
+
+struct rta_cache {
+ u32 count;
+ u32 size;
+ u32 limit;
+ u32 mask;
+ rta * _Atomic table[0];
+} * _Atomic rta_cache;
+// rta_aux, rta_cache = { .size = ATOMIC_VAR_INIT(32), };
+
+static struct rta_cache *
+rta_alloc_hash(u32 size)
{
- rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size);
- if (rta_cache_size < 32768)
- rta_cache_limit = rta_cache_size * 2;
- else
- rta_cache_limit = ~0;
- rta_cache_mask = rta_cache_size - 1;
+ struct rta_cache *c = mb_allocz(rta_pool, sizeof(struct rta_cache) + sizeof(rta * _Atomic) * size);
+ c->size = size;
+ c->limit = (size >> 20) ? (~0U) : (size * 2);
+ c->mask = size - 1;
+ return c;
}
static inline uint
@@ -1104,13 +1197,14 @@ rta_hash(rta *a)
u64 h;
mem_hash_init(&h);
#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f));
- MIX(src);
+#define BMIX(f) mem_hash_mix_num(&h, a->f);
MIX(hostentry);
MIX(from);
MIX(igp_metric);
- MIX(source);
- MIX(scope);
- MIX(dest);
+ BMIX(source);
+ BMIX(scope);
+ BMIX(dest);
+ MIX(pref);
#undef MIX
return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs);
@@ -1119,8 +1213,7 @@ rta_hash(rta *a)
static inline int
rta_same(rta *x, rta *y)
{
- return (x->src == y->src &&
- x->source == y->source &&
+ return (x->source == y->source &&
x->scope == y->scope &&
x->dest == y->dest &&
x->igp_metric == y->igp_metric &&
@@ -1149,34 +1242,88 @@ rta_copy(rta *o)
}
static inline void
-rta_insert(rta *r)
+rta_insert(rta *r, struct rta_cache *c)
{
- uint h = r->hash_key & rta_cache_mask;
- r->next = rta_hash_table[h];
- if (r->next)
- r->next->pprev = &r->next;
- r->pprev = &rta_hash_table[h];
- rta_hash_table[h] = r;
+ uint h = r->hash_key & c->mask;
+ rta *next = atomic_load_explicit(&c->table[h], memory_order_relaxed);
+
+ atomic_store_explicit(&r->next, next, memory_order_relaxed);
+ r->pprev = &c->table[h];
+
+ if (next)
+ next->pprev = &r->next;
+
+ /* This store MUST be the last and MUST have release order for thread-safety */
+ atomic_store_explicit(&c->table[h], r, memory_order_release);
}
static void
-rta_rehash(void)
+rta_rehash(struct rta_cache *c)
{
- uint ohs = rta_cache_size;
- uint h;
- rta *r, *n;
- rta **oht = rta_hash_table;
-
- rta_cache_size = 2*rta_cache_size;
- DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size);
- rta_alloc_hash();
- for(h=0; h<ohs; h++)
- for(r=oht[h]; r; r=n)
+ u32 os = c->size;
+
+ struct rta_cache *nc = rta_alloc_hash(os * 2);
+ nc->count = c->count;
+
+ /* First we simply copy every chain to both new locations */
+ for (u32 h = 0; h < os; h++)
+ {
+ rta *r = atomic_load_explicit(&c->table[h], memory_order_relaxed);
+ atomic_store_explicit(&nc->table[h], r, memory_order_relaxed);
+ atomic_store_explicit(&nc->table[h + os], r, memory_order_relaxed);
+ }
+
+ /* Then we exchange the hashes; release semantics forces the previous code to be already done */
+ atomic_store_explicit(&rta_cache, nc, memory_order_release);
+
+ /* And now we pass through both chains and filter them */
+ for (u32 h = 0; h < c->size; h++)
+ {
+ rta * _Atomic * ap = &nc->table[h];
+ rta * _Atomic * bp = &nc->table[h + os];
+
+ rta *r = atomic_load_explicit(ap, memory_order_relaxed);
+ ASSERT_DIE(r == atomic_load_explicit(bp, memory_order_relaxed));
+
+ while (r)
+ {
+ if (r->hash_key & os)
{
- n = r->next;
- rta_insert(r);
+ r->pprev = bp;
+ atomic_store_explicit(bp, r, memory_order_release);
+ bp = &r->next;
}
- mb_free(oht);
+ else
+ {
+ r->pprev = ap;
+ atomic_store_explicit(ap, r, memory_order_release);
+ ap = &r->next;
+ }
+
+ r = atomic_load_explicit(&r->next, memory_order_acquire);
+ }
+
+ atomic_store_explicit(ap, NULL, memory_order_release);
+ atomic_store_explicit(bp, NULL, memory_order_release);
+ }
+
+ synchronize_rcu();
+ mb_free(c);
+}
+
+static rta *
+rta_find(rta *o, u32 h, struct rta_cache *c)
+{
+ rta *r = NULL;
+
+ for (r = atomic_load_explicit(&c->table[h & c->mask], memory_order_acquire); r; r = atomic_load_explicit(&r->next, memory_order_acquire))
+ if (r->hash_key == h && rta_same(r, o))
+ {
+ atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel);
+ return r;
+ }
+
+ return NULL;
}
/**
@@ -1198,43 +1345,89 @@ rta_lookup(rta *o)
rta *r;
uint h;
- ASSERT(!(o->aflags & RTAF_CACHED));
+ ASSERT(!o->cached);
if (o->eattrs)
ea_normalize(o->eattrs);
h = rta_hash(o);
- for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next)
- if (r->hash_key == h && rta_same(r, o))
- return rta_clone(r);
+ /* Lockless lookup */
+ rcu_read_lock();
+ r = rta_find(o, h, atomic_load_explicit(&rta_cache, memory_order_acquire));
+ rcu_read_unlock();
+
+ if (r)
+ return r;
+
+ RTA_LOCK;
+
+ /* Locked lookup to avoid duplicates if possible */
+ struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire);
+ r = rta_find(o, h, c);
+ if (r)
+ {
+ RTA_UNLOCK;
+ return r;
+ }
+
+ /* Store the rta */
r = rta_copy(o);
r->hash_key = h;
- r->aflags = RTAF_CACHED;
- rt_lock_source(r->src);
+ r->cached = 1;
rt_lock_hostentry(r->hostentry);
- rta_insert(r);
+ rta_insert(r, c);
- if (++rta_cache_count > rta_cache_limit)
- rta_rehash();
+ if (++c->count > c->limit)
+ rta_rehash(c);
+ RTA_UNLOCK;
return r;
}
void
rta__free(rta *a)
{
- ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED));
- rta_cache_count--;
- *a->pprev = a->next;
- if (a->next)
- a->next->pprev = a->pprev;
+ ASSERT(a->cached);
+
+ RTA_LOCK;
+ struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire);
+
+ if (atomic_load_explicit(&a->uc, memory_order_acquire))
+ {
+ /* Acquired inbetween */
+ RTA_UNLOCK;
+ return;
+ }
+
+ /* Relink the forward pointer */
+ rta *next = atomic_load_explicit(&a->next, memory_order_acquire);
+ atomic_store_explicit(a->pprev, next, memory_order_release);
+
+ /* Relink the backwards pointer */
+ if (next)
+ next->pprev = a->pprev;
+
+ /* Wait until nobody knows about us */
+ synchronize_rcu();
+
+ if (atomic_load_explicit(&a->uc, memory_order_acquire))
+ {
+ /* Acquired inbetween, relink back */
+ rta_insert(a, c);
+ RTA_UNLOCK;
+ return;
+ }
+
+ /* Cleared to free the memory */
rt_unlock_hostentry(a->hostentry);
- rt_unlock_source(a->src);
if (a->nh.next)
nexthop_free(a->nh.next);
ea_free(a->eattrs);
- a->aflags = 0; /* Poison the entry */
+ a->cached = 0;
+ c->count--;
sl_free(rta_slab(a), a);
+
+ RTA_UNLOCK;
}
rta *
@@ -1248,8 +1441,7 @@ rta_do_cow(rta *o, linpool *lp)
memcpy(*nhn, nho, nexthop_size(nho));
nhn = &((*nhn)->next);
}
- r->aflags = 0;
- r->uc = 0;
+ rta_uncache(r);
return r;
}
@@ -1260,22 +1452,22 @@ rta_do_cow(rta *o, linpool *lp)
* This function takes a &rta and dumps its contents to the debug output.
*/
void
-rta_dump(rta *a)
+rta_dump(const rta *a)
{
- static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
+ static char *rts[] = { "", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
"RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP",
"RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1",
"RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" };
static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };
- debug("p=%s uc=%d %s %s%s h=%04x",
- a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope),
+ debug("pref=%d uc=%d %s %s%s h=%04x",
+ a->pref, a->uc, rts[a->source], ip_scope_text(a->scope),
rtd[a->dest], a->hash_key);
- if (!(a->aflags & RTAF_CACHED))
+ if (!a->cached)
debug(" !CACHED");
debug(" <-%I", a->from);
if (a->dest == RTD_UNICAST)
- for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
+ for (const struct nexthop *nh = &(a->nh); nh; nh = nh->next)
{
if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw);
if (nh->labels) debug(" L %d", nh->label[0]);
@@ -1302,19 +1494,27 @@ rta_dump_all(void)
rta *a;
uint h;
- debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit);
- for(h=0; h<rta_cache_size; h++)
- for(a=rta_hash_table[h]; a; a=a->next)
+ RTA_LOCK;
+
+ struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire);
+
+ debug("Route attribute cache (%d entries, rehash at %d):\n", c->count, c->limit);
+ for(h=0; h<c->size; h++)
+ for(a = atomic_load_explicit(&c->table[h], memory_order_acquire);
+ a;
+ a = atomic_load_explicit(&a->next, memory_order_acquire))
{
debug("%p ", a);
rta_dump(a);
debug("\n");
}
debug("\n");
+
+ RTA_UNLOCK;
}
void
-rta_show(struct cli *c, rta *a)
+rta_show(struct cli *c, const rta *a)
{
cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope));
@@ -1332,7 +1532,9 @@ rta_show(struct cli *c, rta *a)
void
rta_init(void)
{
- rta_pool = rp_new(&root_pool, "Attributes");
+ attrs_domain = DOMAIN_NEW(attrs, "Attributes");
+
+ rta_pool = rp_new(&root_pool, &main_birdloop, "Attributes");
rta_slab_[0] = sl_new(rta_pool, sizeof(rta));
rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32));
@@ -1344,7 +1546,7 @@ rta_init(void)
nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2);
nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
- rta_alloc_hash();
+ atomic_store_explicit(&rta_cache, rta_alloc_hash(32), memory_order_relaxed);
rte_src_init();
}