diff options
Diffstat (limited to 'nest/rt-attr.c')
-rw-r--r-- | nest/rt-attr.c | 394 |
1 files changed, 298 insertions, 96 deletions
diff --git a/nest/rt-attr.c b/nest/rt-attr.c index c630aa95..357cd216 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -54,6 +54,7 @@ #include "lib/hash.h" #include "lib/idm.h" #include "lib/resource.h" +#include "lib/rcu.h" #include "lib/string.h" #include <stddef.h> @@ -61,7 +62,6 @@ const adata null_adata; /* adata of length 0 */ const char * const rta_src_names[RTS_MAX] = { - [RTS_DUMMY] = "", [RTS_STATIC] = "static", [RTS_INHERIT] = "inherit", [RTS_DEVICE] = "device", @@ -86,7 +86,14 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +DEFINE_DOMAIN(attrs); +static DOMAIN(attrs) src_domain; + +#define SRC_LOCK LOCK_DOMAIN(attrs, src_domain) +#define SRC_UNLOCK UNLOCK_DOMAIN(attrs, src_domain) + pool *rta_pool; +pool *src_pool; static slab *rta_slab_[4]; static slab *nexthop_slab_[4]; @@ -97,72 +104,151 @@ static struct idm src_ids; /* rte source hash */ -#define RSH_KEY(n) n->proto, n->private_id +#define RSH_KEY(n) n->private_id #define RSH_NEXT(n) n->next -#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2 -#define RSH_FN(p,n) p->hash_key ^ u32_hash(n) +#define RSH_EQ(n1,n2) n1 == n2 +#define RSH_FN(n) u32_hash(n) #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 -#define RSH_INIT_ORDER 6 - -static HASH(struct rte_src) src_hash; +#define RSH_INIT_ORDER 2 static void rte_src_init(void) { - rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); - - idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); + src_domain = DOMAIN_NEW(attrs, "Route sources"); + src_pool = rp_new(&root_pool, &main_birdloop, "Route sources"); + rte_src_slab = sl_new(src_pool, sizeof(struct rte_src)); - HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); + idm_init(&src_ids, src_pool, SRC_ID_INIT_SIZE); } - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) -struct rte_src * -rt_find_source(struct proto *p, u32 id) +static struct rte_src * +rt_find_source(struct rte_owner *p, u32 id) { - return HASH_FIND(src_hash, RSH, p, id); + return HASH_FIND(p->hash, RSH, id); } struct rte_src * -rt_get_source(struct proto *p, u32 id) +rt_get_source_o(struct rte_owner *p, u32 id) { + if (p->stop) + bug("Stopping route owner asked for another source."); + struct rte_src *src = rt_find_source(p, id); if (src) + { + UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); return src; + } + SRC_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + atomic_store_explicit(&src->uc, 1, memory_order_release); + p->uc++; + + HASH_INSERT2(p->hash, RSH, src_pool, src); + if (config->table_debug) + log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + + SRC_UNLOCK; return src; } +static inline void +rt_done_sources(struct rte_owner *o) +{ + if (o->stop->list) + ev_send(o->stop->list, o->stop); + else + ev_send(o->list, o->stop); +} + void -rt_prune_sources(void) +rt_prune_sources(void *data) { - HASH_WALK_FILTER(src_hash, next, src, sp) + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + u64 uc; + while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) + ; + + if (uc == 0) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + + HASH_DO_REMOVE(o->hash, RSH, sp); + + SRC_LOCK; idm_free(&src_ids, src->global_id); sl_free(rte_src_slab, src); + SRC_UNLOCK; } } HASH_WALK_FILTER_END; - HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool); + SRC_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, src_pool); + + if (o->stop && !o->uc) + { + rfree(o->prune); + SRC_UNLOCK; + + if (config->table_debug) + log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); + + rt_done_sources(o); + } + else + SRC_UNLOCK; +} + +void +rt_init_sources(struct rte_owner *o, const char *name, event_list *list) +{ + SRC_LOCK; + HASH_INIT(o->hash, src_pool, RSH_INIT_ORDER); + o->hash_key = random_u32(); + o->uc = 0; + o->name = name; + o->prune = ev_new_init(src_pool, rt_prune_sources, o); + o->stop = NULL; + o->list = list; + SRC_UNLOCK; } +void +rt_destroy_sources(struct rte_owner *o, event *done) +{ + o->stop = done; + + if (!o->uc) + { + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); + + SRC_LOCK; + rfree(o->prune); + SRC_UNLOCK; + + rt_done_sources(o); + } + else + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc); +} /* * Multipath Next Hop @@ -541,8 +627,8 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) * by calling ea_find() to find the attribute, extracting its value or returning * a provided default if no such attribute is present. */ -int -ea_get_int(ea_list *e, unsigned id, int def) +uintptr_t +ea_get_int(ea_list *e, unsigned id, uintptr_t def) { eattr *a = ea_find(e, id); if (!a) @@ -1081,21 +1167,28 @@ ea_append(ea_list *to, ea_list *what) * rta's */ -static uint rta_cache_count; -static uint rta_cache_size = 32; -static uint rta_cache_limit; -static uint rta_cache_mask; -static rta **rta_hash_table; +static DOMAIN(attrs) attrs_domain; -static void -rta_alloc_hash(void) +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +struct rta_cache { + u32 count; + u32 size; + u32 limit; + u32 mask; + rta * _Atomic table[0]; +} * _Atomic rta_cache; +// rta_aux, rta_cache = { .size = ATOMIC_VAR_INIT(32), }; + +static struct rta_cache * +rta_alloc_hash(u32 size) { - rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size); - if (rta_cache_size < 32768) - rta_cache_limit = rta_cache_size * 2; - else - rta_cache_limit = ~0; - rta_cache_mask = rta_cache_size - 1; + struct rta_cache *c = mb_allocz(rta_pool, sizeof(struct rta_cache) + sizeof(rta * _Atomic) * size); + c->size = size; + c->limit = (size >> 20) ? (~0U) : (size * 2); + c->mask = size - 1; + return c; } static inline uint @@ -1104,13 +1197,14 @@ rta_hash(rta *a) u64 h; mem_hash_init(&h); #define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); - MIX(src); +#define BMIX(f) mem_hash_mix_num(&h, a->f); MIX(hostentry); MIX(from); MIX(igp_metric); - MIX(source); - MIX(scope); - MIX(dest); + BMIX(source); + BMIX(scope); + BMIX(dest); + MIX(pref); #undef MIX return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); @@ -1119,8 +1213,7 @@ rta_hash(rta *a) static inline int rta_same(rta *x, rta *y) { - return (x->src == y->src && - x->source == y->source && + return (x->source == y->source && x->scope == y->scope && x->dest == y->dest && x->igp_metric == y->igp_metric && @@ -1149,34 +1242,88 @@ rta_copy(rta *o) } static inline void -rta_insert(rta *r) +rta_insert(rta *r, struct rta_cache *c) { - uint h = r->hash_key & rta_cache_mask; - r->next = rta_hash_table[h]; - if (r->next) - r->next->pprev = &r->next; - r->pprev = &rta_hash_table[h]; - rta_hash_table[h] = r; + uint h = r->hash_key & c->mask; + rta *next = atomic_load_explicit(&c->table[h], memory_order_relaxed); + + atomic_store_explicit(&r->next, next, memory_order_relaxed); + r->pprev = &c->table[h]; + + if (next) + next->pprev = &r->next; + + /* This store MUST be the last and MUST have release order for thread-safety */ + atomic_store_explicit(&c->table[h], r, memory_order_release); } static void -rta_rehash(void) +rta_rehash(struct rta_cache *c) { - uint ohs = rta_cache_size; - uint h; - rta *r, *n; - rta **oht = rta_hash_table; - - rta_cache_size = 2*rta_cache_size; - DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); - rta_alloc_hash(); - for(h=0; h<ohs; h++) - for(r=oht[h]; r; r=n) + u32 os = c->size; + + struct rta_cache *nc = rta_alloc_hash(os * 2); + nc->count = c->count; + + /* First we simply copy every chain to both new locations */ + for (u32 h = 0; h < os; h++) + { + rta *r = atomic_load_explicit(&c->table[h], memory_order_relaxed); + atomic_store_explicit(&nc->table[h], r, memory_order_relaxed); + atomic_store_explicit(&nc->table[h + os], r, memory_order_relaxed); + } + + /* Then we exchange the hashes; release semantics forces the previous code to be already done */ + atomic_store_explicit(&rta_cache, nc, memory_order_release); + + /* And now we pass through both chains and filter them */ + for (u32 h = 0; h < c->size; h++) + { + rta * _Atomic * ap = &nc->table[h]; + rta * _Atomic * bp = &nc->table[h + os]; + + rta *r = atomic_load_explicit(ap, memory_order_relaxed); + ASSERT_DIE(r == atomic_load_explicit(bp, memory_order_relaxed)); + + while (r) + { + if (r->hash_key & os) { - n = r->next; - rta_insert(r); + r->pprev = bp; + atomic_store_explicit(bp, r, memory_order_release); + bp = &r->next; } - mb_free(oht); + else + { + r->pprev = ap; + atomic_store_explicit(ap, r, memory_order_release); + ap = &r->next; + } + + r = atomic_load_explicit(&r->next, memory_order_acquire); + } + + atomic_store_explicit(ap, NULL, memory_order_release); + atomic_store_explicit(bp, NULL, memory_order_release); + } + + synchronize_rcu(); + mb_free(c); +} + +static rta * +rta_find(rta *o, u32 h, struct rta_cache *c) +{ + rta *r = NULL; + + for (r = atomic_load_explicit(&c->table[h & c->mask], memory_order_acquire); r; r = atomic_load_explicit(&r->next, memory_order_acquire)) + if (r->hash_key == h && rta_same(r, o)) + { + atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + return r; + } + + return NULL; } /** @@ -1198,43 +1345,89 @@ rta_lookup(rta *o) rta *r; uint h; - ASSERT(!(o->aflags & RTAF_CACHED)); + ASSERT(!o->cached); if (o->eattrs) ea_normalize(o->eattrs); h = rta_hash(o); - for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) - if (r->hash_key == h && rta_same(r, o)) - return rta_clone(r); + /* Lockless lookup */ + rcu_read_lock(); + r = rta_find(o, h, atomic_load_explicit(&rta_cache, memory_order_acquire)); + rcu_read_unlock(); + + if (r) + return r; + + RTA_LOCK; + + /* Locked lookup to avoid duplicates if possible */ + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + r = rta_find(o, h, c); + if (r) + { + RTA_UNLOCK; + return r; + } + + /* Store the rta */ r = rta_copy(o); r->hash_key = h; - r->aflags = RTAF_CACHED; - rt_lock_source(r->src); + r->cached = 1; rt_lock_hostentry(r->hostentry); - rta_insert(r); + rta_insert(r, c); - if (++rta_cache_count > rta_cache_limit) - rta_rehash(); + if (++c->count > c->limit) + rta_rehash(c); + RTA_UNLOCK; return r; } void rta__free(rta *a) { - ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED)); - rta_cache_count--; - *a->pprev = a->next; - if (a->next) - a->next->pprev = a->pprev; + ASSERT(a->cached); + + RTA_LOCK; + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + { + /* Acquired inbetween */ + RTA_UNLOCK; + return; + } + + /* Relink the forward pointer */ + rta *next = atomic_load_explicit(&a->next, memory_order_acquire); + atomic_store_explicit(a->pprev, next, memory_order_release); + + /* Relink the backwards pointer */ + if (next) + next->pprev = a->pprev; + + /* Wait until nobody knows about us */ + synchronize_rcu(); + + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + { + /* Acquired inbetween, relink back */ + rta_insert(a, c); + RTA_UNLOCK; + return; + } + + /* Cleared to free the memory */ rt_unlock_hostentry(a->hostentry); - rt_unlock_source(a->src); if (a->nh.next) nexthop_free(a->nh.next); ea_free(a->eattrs); - a->aflags = 0; /* Poison the entry */ + a->cached = 0; + c->count--; sl_free(rta_slab(a), a); + + RTA_UNLOCK; } rta * @@ -1248,8 +1441,7 @@ rta_do_cow(rta *o, linpool *lp) memcpy(*nhn, nho, nexthop_size(nho)); nhn = &((*nhn)->next); } - r->aflags = 0; - r->uc = 0; + rta_uncache(r); return r; } @@ -1260,22 +1452,22 @@ rta_do_cow(rta *o, linpool *lp) * This function takes a &rta and dumps its contents to the debug output. */ void -rta_dump(rta *a) +rta_dump(const rta *a) { - static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", + static char *rts[] = { "", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - debug("p=%s uc=%d %s %s%s h=%04x", - a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), + debug("pref=%d uc=%d %s %s%s h=%04x", + a->pref, a->uc, rts[a->source], ip_scope_text(a->scope), rtd[a->dest], a->hash_key); - if (!(a->aflags & RTAF_CACHED)) + if (!a->cached) debug(" !CACHED"); debug(" <-%I", a->from); if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + for (const struct nexthop *nh = &(a->nh); nh; nh = nh->next) { if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); if (nh->labels) debug(" L %d", nh->label[0]); @@ -1302,19 +1494,27 @@ rta_dump_all(void) rta *a; uint h; - debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); - for(h=0; h<rta_cache_size; h++) - for(a=rta_hash_table[h]; a; a=a->next) + RTA_LOCK; + + struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire); + + debug("Route attribute cache (%d entries, rehash at %d):\n", c->count, c->limit); + for(h=0; h<c->size; h++) + for(a = atomic_load_explicit(&c->table[h], memory_order_acquire); + a; + a = atomic_load_explicit(&a->next, memory_order_acquire)) { debug("%p ", a); rta_dump(a); debug("\n"); } debug("\n"); + + RTA_UNLOCK; } void -rta_show(struct cli *c, rta *a) +rta_show(struct cli *c, const rta *a) { cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope)); @@ -1332,7 +1532,9 @@ rta_show(struct cli *c, rta *a) void rta_init(void) { - rta_pool = rp_new(&root_pool, "Attributes"); + attrs_domain = DOMAIN_NEW(attrs, "Attributes"); + + rta_pool = rp_new(&root_pool, &main_birdloop, "Attributes"); rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); @@ -1344,7 +1546,7 @@ rta_init(void) nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - rta_alloc_hash(); + atomic_store_explicit(&rta_cache, rta_alloc_hash(32), memory_order_relaxed); rte_src_init(); } |