diff options
Diffstat (limited to 'nest/rt-attr.c')
-rw-r--r-- | nest/rt-attr.c | 1152 |
1 files changed, 708 insertions, 444 deletions
diff --git a/nest/rt-attr.c b/nest/rt-attr.c index d793c72e..761ec0d9 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -45,11 +45,11 @@ */ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" #include "lib/idm.h" @@ -57,9 +57,25 @@ #include "lib/string.h" #include <stddef.h> +#include <stdlib.h> const adata null_adata; /* adata of length 0 */ +struct ea_class ea_gen_igp_metric = { + .name = "igp_metric", + .type = T_INT, +}; + +struct ea_class ea_gen_preference = { + .name = "preference", + .type = T_INT, +}; + +struct ea_class ea_gen_from = { + .name = "from", + .type = T_IP, +}; + const char * const rta_src_names[RTS_MAX] = { [RTS_STATIC] = "static", [RTS_INHERIT] = "inherit", @@ -77,6 +93,71 @@ const char * const rta_src_names[RTS_MAX] = { [RTS_RPKI] = "RPKI", }; +static void +ea_gen_source_format(const eattr *a, byte *buf, uint size) +{ + if ((a->u.data >= RTS_MAX) || !rta_src_names[a->u.data]) + bsnprintf(buf, size, "unknown"); + else + bsnprintf(buf, size, "%s", rta_src_names[a->u.data]); +} + +struct ea_class ea_gen_source = { + .name = "source", + .type = T_ENUM_RTS, + .readonly = 1, + .format = ea_gen_source_format, +}; + +struct ea_class ea_gen_nexthop = { + .name = "nexthop", + .type = T_NEXTHOP_LIST, +}; + +/* + * ea_set_hostentry() acquires hostentry from hostcache. + * New hostentry has zero use count. Cached rta locks its + * hostentry (increases its use count), uncached rta does not lock it. + * Hostentry with zero use count is removed asynchronously + * during host cache update, therefore it is safe to hold + * such hostentry temporarily as long as you hold the table lock. + * + * There is no need to hold a lock for hostentry->dep table, because that table + * contains routes responsible for that hostentry, and therefore is non-empty if + * given hostentry has non-zero use count. If the hostentry has zero use count, + * the entry is removed before dep is referenced. + * + * The protocol responsible for routes with recursive next hops should hold a + * lock for a 'source' table governing that routes (argument tab), + * because its routes reference hostentries related to the governing table. + * When all such routes are + * removed, rtas are immediately removed achieving zero uc. Then the 'source' + * table lock could be immediately released, although hostentries may still + * exist - they will be freed together with the 'source' table. + */ + + static void +ea_gen_hostentry_stored(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc++; +} + +static void +ea_gen_hostentry_freed(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc--; +} + +struct ea_class ea_gen_hostentry = { + .name = "hostentry", + .type = T_HOSTENTRY, + .readonly = 1, + .stored = ea_gen_hostentry_stored, + .freed = ea_gen_hostentry_freed, +}; + const char * rta_dest_names[RTD_MAX] = { [RTD_NONE] = "", [RTD_UNICAST] = "unicast", @@ -85,10 +166,26 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +struct ea_class ea_gen_flowspec_valid = { + .name = "flowspec_valid", + .type = T_ENUM_FLOWSPEC_VALID, + .readonly = 1, +}; + +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + +DOMAIN(attrs) attrs_domain; + pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; +/* Assuming page size of 4096, these are magic values for slab allocation */ +static const uint ea_slab_sizes[] = { 56, 112, 168, 288, 448, 800, 1344 }; +static slab *ea_slab[ARRAY_SIZE(ea_slab_sizes)]; + static slab *rte_src_slab; static struct idm src_ids; @@ -96,121 +193,176 @@ static struct idm src_ids; /* rte source hash */ -#define RSH_KEY(n) n->proto, n->private_id +#define RSH_KEY(n) n->private_id #define RSH_NEXT(n) n->next -#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2 -#define RSH_FN(p,n) p->hash_key ^ u32_hash(n) +#define RSH_EQ(n1,n2) n1 == n2 +#define RSH_FN(n) u32_hash(n) #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 -#define RSH_INIT_ORDER 6 - -static HASH(struct rte_src) src_hash; +#define RSH_INIT_ORDER 2 +static struct rte_src **rte_src_global; +static uint rte_src_global_max = SRC_ID_INIT_SIZE; static void rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); - - HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) -struct rte_src * -rt_find_source(struct proto *p, u32 id) +static struct rte_src * +rt_find_source(struct rte_owner *p, u32 id) { - return HASH_FIND(src_hash, RSH, p, id); + return HASH_FIND(p->hash, RSH, id); } struct rte_src * -rt_get_source(struct proto *p, u32 id) +rt_get_source_o(struct rte_owner *p, u32 id) { + if (p->stop) + bug("Stopping route owner asked for another source."); + struct rte_src *src = rt_find_source(p, id); if (src) + { + UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); return src; + } + RTA_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + atomic_store_explicit(&src->uc, 1, memory_order_release); + p->uc++; + + HASH_INSERT2(p->hash, RSH, rta_pool, src); + if (config->table_debug) + log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + + if (src->global_id >= rte_src_global_max) + { + rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); + memset(&rte_src_global[rte_src_global_max / 2], 0, + sizeof(struct rte_src *) * (rte_src_global_max / 2)); + } + + rte_src_global[src->global_id] = src; + RTA_UNLOCK; return src; } +struct rte_src * +rt_find_source_global(u32 id) +{ + if (id >= rte_src_global_max) + return NULL; + else + return rte_src_global[id]; +} + +static inline void +rt_done_sources(struct rte_owner *o) +{ + ev_send(o->list, o->stop); +} + void -rt_prune_sources(void) +rt_prune_sources(void *data) { - HASH_WALK_FILTER(src_hash, next, src, sp) + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + u64 uc; + while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) + synchronize_rcu(); + + if (uc == 0) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + + HASH_DO_REMOVE(o->hash, RSH, sp); + + RTA_LOCK; + rte_src_global[src->global_id] = NULL; idm_free(&src_ids, src->global_id); sl_free(src); + RTA_UNLOCK; } } HASH_WALK_FILTER_END; - HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool); -} - + RTA_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); -/* - * Multipath Next Hop - */ - -static inline u32 -nexthop_hash(struct nexthop *x) -{ - u32 h = 0; - for (; x; x = x->next) + if (o->stop && !o->uc) { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + rfree(o->prune); + RTA_UNLOCK; - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + if (config->table_debug) + log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); + + rt_done_sources(o); } + else + RTA_UNLOCK; +} - return h; +void +rt_init_sources(struct rte_owner *o, const char *name, event_list *list) +{ + RTA_LOCK; + HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + o->hash_key = random_u32(); + o->uc = 0; + o->name = name; + o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->stop = NULL; + o->list = list; + RTA_UNLOCK; } -int -nexthop__same(struct nexthop *x, struct nexthop *y) +void +rt_destroy_sources(struct rte_owner *o, event *done) { - for (; x && y; x = x->next, y = y->next) + o->stop = done; + + if (!o->uc) { - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels_orig != y->labels_orig) || (x->labels != y->labels)) - return 0; + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } + RTA_LOCK; + rfree(o->prune); + RTA_UNLOCK; - return x == y; + rt_done_sources(o); + } + else + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc); } +/* + * Multipath Next Hop + */ + static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { int r; - - if (!x) - return 1; - - if (!y) - return -1; - /* Should we also compare flags ? */ r = ((int) y->weight) - ((int) x->weight); @@ -235,23 +387,16 @@ nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct nexthop * -nexthop_copy_node(const struct nexthop *src, linpool *lp) +static int +nexthop_compare_qsort(const void *x, const void *y) { - struct nexthop *n = lp_alloc(lp, nexthop_size(src)); - - memcpy(n, src, nexthop_size(src)); - n->next = NULL; - - return n; + return nexthop_compare_node( *(const struct nexthop **) x, *(const struct nexthop **) y ); } /** * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 - * @rx: reusability of list @x - * @ry: reusability of list @y * @max: max number of nexthops * @lp: linpool for allocating nexthops * @@ -268,138 +413,229 @@ nexthop_copy_node(const struct nexthop *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct nexthop * -nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) +struct nexthop_adata * +nexthop_merge(struct nexthop_adata *xin, struct nexthop_adata *yin, int max, linpool *lp) { - struct nexthop *root = NULL; - struct nexthop **n = &root; + uint outlen = ADATA_SIZE(xin->ad.length) + ADATA_SIZE(yin->ad.length); + struct nexthop_adata *out = lp_alloc(lp, outlen); + out->ad.length = outlen - sizeof (struct adata); + + struct nexthop *x = &xin->nh, *y = &yin->nh, *cur = &out->nh; + int xvalid, yvalid; - while ((x || y) && max--) + while (max--) { - int cmp = nexthop_compare_node(x, y); + xvalid = NEXTHOP_VALID(x, xin); + yvalid = NEXTHOP_VALID(y, yin); + + if (!xvalid && !yvalid) + break; + + ASSUME(NEXTHOP_VALID(cur, out)); + + int cmp = !xvalid ? 1 : !yvalid ? -1 : nexthop_compare_node(x, y); if (cmp < 0) { - ASSUME(x); - *n = rx ? x : nexthop_copy_node(x, lp); - x = x->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); } else if (cmp > 0) { - ASSUME(y); - *n = ry ? y : nexthop_copy_node(y, lp); - y = y->next; + ASSUME(NEXTHOP_VALID(y, yin)); + memcpy(cur, y, nexthop_size(y)); + y = NEXTHOP_NEXT(y); } else { - ASSUME(x && y); - *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); - x = x->next; - y = y->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + + ASSUME(NEXTHOP_VALID(y, yin)); + y = NEXTHOP_NEXT(y); } - n = &((*n)->next); + cur = NEXTHOP_NEXT(cur); } - *n = NULL; - return root; + out->ad.length = (void *) cur - (void *) out->ad.data; + + return out; } -void -nexthop_insert(struct nexthop **n, struct nexthop *x) +struct nexthop_adata * +nexthop_sort(struct nexthop_adata *nhad, linpool *lp) { - for (; *n; n = &((*n)->next)) - { - int cmp = nexthop_compare_node(*n, x); + /* Count the nexthops */ + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + cnt++; - if (cmp < 0) - continue; - else if (cmp > 0) - break; - else - return; - } + if (cnt <= 1) + return nhad; - x->next = *n; - *n = x; -} + /* Get pointers to them */ + struct nexthop **sptr = tmp_alloc(cnt * sizeof(struct nexthop *)); -struct nexthop * -nexthop_sort(struct nexthop *x) -{ - struct nexthop *s = NULL; + uint i = 0; + NEXTHOP_WALK(nh, nhad) + sptr[i++] = nh; + + /* Sort the pointers */ + qsort(sptr, cnt, sizeof(struct nexthop *), nexthop_compare_qsort); - /* Simple insert-sort */ - while (x) + /* Allocate the output */ + struct nexthop_adata *out = (struct nexthop_adata *) lp_alloc_adata(lp, nhad->ad.length); + struct nexthop *dest = &out->nh; + + /* Deduplicate nexthops while storing them */ + for (uint i = 0; i < cnt; i++) { - struct nexthop *n = x; - x = n->next; - n->next = NULL; + if (i && !nexthop_compare_node(sptr[i], sptr[i-1])) + continue; - nexthop_insert(&s, n); + memcpy(dest, sptr[i], NEXTHOP_SIZE(sptr[i])); + dest = NEXTHOP_NEXT(dest); } - return s; + out->ad.length = (void *) dest - (void *) out->ad.data; + return out; } int -nexthop_is_sorted(struct nexthop *x) +nexthop_is_sorted(struct nexthop_adata *nhad) { - for (; x && x->next; x = x->next) - if (nexthop_compare_node(x, x->next) >= 0) + struct nexthop *prev = NULL; + NEXTHOP_WALK(nh, nhad) + { + if (prev && (nexthop_compare_node(prev, nh) >= 0)) return 0; + prev = nh; + } + return 1; } -static inline slab * -nexthop_slab(struct nexthop *nh) +/* + * Extended Attributes + */ + +#define EA_CLASS_INITIAL_MAX 128 +static struct ea_class **ea_class_global = NULL; +static uint ea_class_max; +static struct idm ea_class_idm; + +/* Config parser lex register function */ +void ea_lex_register(struct ea_class *def); +void ea_lex_unregister(struct ea_class *def); + +static void +ea_class_free(struct ea_class *cl) { - return nexthop_slab_[MIN(nh->labels, 3)]; + /* No more ea class references. Unregister the attribute. */ + idm_free(&ea_class_idm, cl->id); + ea_class_global[cl->id] = NULL; + if (!cl->hidden) + ea_lex_unregister(cl); } -static struct nexthop * -nexthop_copy(struct nexthop *o) +static void +ea_class_ref_free(resource *r) { - struct nexthop *first = NULL; - struct nexthop **last = &first; - - for (; o; o = o->next) - { - struct nexthop *n = sl_allocz(nexthop_slab(o)); - n->gw = o->gw; - n->iface = o->iface; - n->next = NULL; - n->flags = o->flags; - n->weight = o->weight; - n->labels_orig = o->labels_orig; - n->labels = o->labels; - for (int i=0; i<o->labels; i++) - n->label[i] = o->label[i]; - - *last = n; - last = &(n->next); - } + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + if (!--ref->class->uc) + ea_class_free(ref->class); +} - return first; +static void +ea_class_ref_dump(resource *r) +{ + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + debug("name \"%s\", type=%d\n", ref->class->name, ref->class->type); } +static struct resclass ea_class_ref_class = { + .name = "Attribute class reference", + .size = sizeof(struct ea_class_ref), + .free = ea_class_ref_free, + .dump = ea_class_ref_dump, + .lookup = NULL, + .memsize = NULL, +}; + static void -nexthop_free(struct nexthop *o) +ea_class_init(void) { - struct nexthop *n; + ASSERT_DIE(ea_class_global == NULL); - while (o) - { - n = o->next; - sl_free(o); - o = n; - } + idm_init(&ea_class_idm, rta_pool, EA_CLASS_INITIAL_MAX); + ea_class_global = mb_allocz(rta_pool, + sizeof(*ea_class_global) * (ea_class_max = EA_CLASS_INITIAL_MAX)); +} + +static struct ea_class_ref * +ea_ref_class(pool *p, struct ea_class *def) +{ + def->uc++; + struct ea_class_ref *ref = ralloc(p, &ea_class_ref_class); + ref->class = def; + return ref; } +static struct ea_class_ref * +ea_register(pool *p, struct ea_class *def) +{ + def->id = idm_alloc(&ea_class_idm); -/* - * Extended Attributes - */ + ASSERT_DIE(ea_class_global); + while (def->id >= ea_class_max) + ea_class_global = mb_realloc(ea_class_global, sizeof(*ea_class_global) * (ea_class_max *= 2)); + + ASSERT_DIE(def->id < ea_class_max); + ea_class_global[def->id] = def; + + if (!def->hidden) + ea_lex_register(def); + + return ea_ref_class(p, def); +} + +struct ea_class_ref * +ea_register_alloc(pool *p, struct ea_class cl) +{ + struct ea_class *clp = ea_class_find_by_name(cl.name); + if (clp && clp->type == cl.type) + return ea_ref_class(p, clp); + + uint namelen = strlen(cl.name) + 1; + + struct { + struct ea_class cl; + char name[0]; + } *cla = mb_alloc(rta_pool, sizeof(struct ea_class) + namelen); + cla->cl = cl; + memcpy(cla->name, cl.name, namelen); + cla->cl.name = cla->name; + + return ea_register(p, &cla->cl); +} + +void +ea_register_init(struct ea_class *clp) +{ + ASSERT_DIE(!ea_class_find_by_name(clp->name)); + ea_register(&root_pool, clp); +} + +struct ea_class * +ea_class_find_by_id(uint id) +{ + ASSERT_DIE(id < ea_class_max); + ASSERT_DIE(ea_class_global[id]); + return ea_class_global[id]; +} static inline eattr * ea__find(ea_list *e, unsigned id) @@ -444,7 +680,7 @@ ea__find(ea_list *e, unsigned id) * to its &eattr structure or %NULL if no such attribute exists. */ eattr * -ea_find(ea_list *e, unsigned id) +ea_find_by_id(ea_list *e, unsigned id) { eattr *a = ea__find(e, id & EA_CODE_MASK); @@ -529,25 +765,6 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) return NULL; } -/** - * ea_get_int - fetch an integer attribute - * @e: attribute list - * @id: attribute ID - * @def: default value - * - * This function is a shortcut for retrieving a value of an integer attribute - * by calling ea_find() to find the attribute, extracting its value or returning - * a provided default if no such attribute is present. - */ -uintptr_t -ea_get_int(ea_list *e, unsigned id, uintptr_t def) -{ - eattr *a = ea_find(e, id); - if (!a) - return def; - return a->u.data; -} - static inline void ea_do_sort(ea_list *e) { @@ -614,8 +831,8 @@ ea_do_prune(ea_list *e) s++; /* Now s0 is the most recent version, s[-1] the oldest one */ - /* Drop undefs */ - if (s0->undef) + /* Drop undefs unless this is a true overlay */ + if (s0->undef && (s[-1].undef || !e->next)) continue; /* Copy the newest version to destination */ @@ -645,21 +862,18 @@ ea_do_prune(ea_list *e) * If an attribute occurs multiple times in a single &ea_list, * ea_sort() leaves only the first (the only significant) occurrence. */ -void +static void ea_sort(ea_list *e) { - while (e) - { - if (!(e->flags & EALF_SORTED)) - { - ea_do_sort(e); - ea_do_prune(e); - e->flags |= EALF_SORTED; - } - if (e->count > 5) - e->flags |= EALF_BISECT; - e = e->next; - } + if (!(e->flags & EALF_SORTED)) + { + ea_do_sort(e); + ea_do_prune(e); + e->flags |= EALF_SORTED; + } + + if (e->count > 5) + e->flags |= EALF_BISECT; } /** @@ -669,8 +883,8 @@ ea_sort(ea_list *e) * This function calculates an upper bound of the size of * a given &ea_list after merging with ea_merge(). */ -unsigned -ea_scan(ea_list *e) +static unsigned +ea_scan(const ea_list *e, int overlay) { unsigned cnt = 0; @@ -678,6 +892,8 @@ ea_scan(ea_list *e) { cnt += e->count; e = e->next; + if (e && overlay && ea_is_cached(e)) + break; } return sizeof(ea_list) + sizeof(eattr)*cnt; } @@ -696,21 +912,36 @@ ea_scan(ea_list *e) * segments with ea_merge() and finally sort and prune the result * by calling ea_sort(). */ -void -ea_merge(ea_list *e, ea_list *t) +static void +ea_merge(ea_list *e, ea_list *t, int overlay) { eattr *d = t->attrs; t->flags = 0; t->count = 0; - t->next = NULL; + while (e) { memcpy(d, e->attrs, sizeof(eattr)*e->count); t->count += e->count; d += e->count; e = e->next; + + if (e && overlay && ea_is_cached(e)) + break; } + + t->next = e; +} + +ea_list * +ea_normalize(ea_list *e, int overlay) +{ + ea_list *t = tmp_alloc(ea_scan(e, overlay)); + ea_merge(e, t, overlay); + ea_sort(t); + + return t->count ? t : t->next; } /** @@ -728,7 +959,8 @@ ea_same(ea_list *x, ea_list *y) if (!x || !y) return x == y; - ASSERT(!x->next && !y->next); + if (x->next != y->next) + return 0; if (x->count != y->count) return 0; for(c=0; c<x->count; c++) @@ -748,33 +980,37 @@ ea_same(ea_list *x, ea_list *y) return 1; } -static inline ea_list * -ea_list_copy(ea_list *o) +uint +ea_list_size(ea_list *o) { - ea_list *n; - unsigned i, adpos, elen; + unsigned i, elen; - if (!o) - return NULL; - ASSERT(!o->next); - elen = adpos = sizeof(ea_list) + sizeof(eattr) * o->count; + ASSERT_DIE(o); + elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; i<o->count; i++) { eattr *a = &o->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) - elen += sizeof(struct adata) + a->u.ptr->length; + if (!a->undef && !(a->type & EAF_EMBEDDED)) + elen += ADATA_SIZE(a->u.ptr->length); } - n = mb_alloc(rta_pool, elen); + return elen; +} + +void +ea_list_copy(ea_list *n, ea_list *o, uint elen) +{ + uint adpos = sizeof(ea_list) + sizeof(eattr) * o->count; memcpy(n, o, adpos); - n->flags |= EALF_CACHED; - for(i=0; i<o->count; i++) + adpos = BIRD_CPU_ALIGN(adpos); + + for(uint i=0; i<o->count; i++) { eattr *a = &n->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) { - unsigned size = sizeof(struct adata) + a->u.ptr->length; + unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); struct adata *d = ((void *) n) + adpos; @@ -784,30 +1020,58 @@ ea_list_copy(ea_list *o) adpos += size; } } + ASSERT_DIE(adpos == elen); - return n; } -static inline void -ea_free(ea_list *o) +static void +ea_list_ref(ea_list *l) { - if (o) + for(uint i=0; i<l->count; i++) { - ASSERT(!o->next); - mb_free(o); + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->stored, a); + cl->uc++; } + + if (l->next) + { + ASSERT_DIE(ea_is_cached(l->next)); + ea_clone(l->next); + } } -static int -get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED) +static void ea_free_nested(ea_list *l); + +static void +ea_list_unref(ea_list *l) { - if (a->id == EA_GEN_IGP_METRIC) + for(uint i=0; i<l->count; i++) { - *buf += bsprintf(*buf, "igp_metric"); - return GA_NAME; + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->freed, a); + if (!--cl->uc) + ea_class_free(cl); } - return GA_UNKNOWN; + if (l->next) + ea_free_nested(l->next); } void @@ -860,41 +1124,90 @@ opaque_format(const struct adata *ad, byte *buf, uint size) } static inline void -ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end) +ea_show_int_set(struct cli *c, const char *name, const struct adata *ad, int way, byte *buf) { - int i = int_set_format(ad, way, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = int_set_format(ad, way, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = int_set_format(ad, way, i, buf, end - buf - 1); + i = int_set_format(ad, way, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_ec_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = ec_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = ec_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = ec_set_format(ad, i, buf, end - buf - 1); + i = ec_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_lc_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = lc_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = lc_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = lc_set_format(ad, i, buf, end - buf - 1); + i = lc_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } +void +ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad) +{ + if (!NEXTHOP_IS_REACHABLE(nhad)) + return; + + NEXTHOP_WALK(nh, nhad) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; + char weight[16] = ""; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (!NEXTHOP_ONE(nhad)) + bsprintf(weight, " weight %d", nh->weight + 1); + + if (ipa_nonzero(nh->gw)) + if (nh->iface) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tvia %I", nh->gw); + else + cli_printf(c, -1007, "\tdev %s%s%s", + nh->iface->name, mpls, onlink, weight); + } +} + +void +ea_show_hostentry(const struct adata *ad, byte *buf, uint size) +{ + const struct hostentry_adata *had = (const struct hostentry_adata *) ad; + + if (ipa_nonzero(had->he->link) && !ipa_equal(had->he->link, had->he->addr)) + bsnprintf(buf, size, "via %I %I table %s", had->he->addr, had->he->link, had->he->tab->name); + else + bsnprintf(buf, size, "via %I table %s", had->he->addr, had->he->tab->name); +} + /** * ea_show - print an &eattr to CLI * @c: destination CLI @@ -906,84 +1219,80 @@ ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte * If the protocol defining the attribute provides its own * get_attr() hook, it's consulted first. */ -void +static void ea_show(struct cli *c, const eattr *e) { - struct protocol *p; - int status = GA_UNKNOWN; const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr; byte buf[CLI_MSG_SIZE]; byte *pos = buf, *end = buf + sizeof(buf); - if (EA_IS_CUSTOM(e->id)) - { - const char *name = ea_custom_name(e->id); - if (name) - { - pos += bsprintf(pos, "%s", name); - status = GA_NAME; - } - else - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - } - else if (p = class_to_protocol[EA_PROTO(e->id)]) - { - pos += bsprintf(pos, "%s.", p->name); - if (p->get_attr) - status = p->get_attr(e, pos, end - pos); - pos += strlen(pos); - } - else if (EA_PROTO(e->id)) - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - else - status = get_generic_attr(e, &pos, end - pos); + ASSERT_DIE(e->id < ea_class_max); - if (status < GA_NAME) - pos += bsprintf(pos, "%02x", EA_ID(e->id)); - if (status < GA_FULL) - { - *pos++ = ':'; - *pos++ = ' '; + struct ea_class *cls = ea_class_global[e->id]; + ASSERT_DIE(cls); + + if (e->undef || cls->hidden) + return; + else if (cls->format) + cls->format(e, buf, end - buf); + else + switch (e->type) + { + case T_INT: + if ((cls == &ea_gen_igp_metric) && e->u.data >= IGP_METRIC_UNKNOWN) + return; - if (e->undef) - bsprintf(pos, "undefined"); - else - switch (e->type & EAF_TYPE_MASK) - { - case EAF_TYPE_INT: bsprintf(pos, "%u", e->u.data); break; - case EAF_TYPE_OPAQUE: + case T_OPAQUE: opaque_format(ad, pos, end - pos); break; - case EAF_TYPE_IP_ADDRESS: + case T_IP: bsprintf(pos, "%I", *(ip_addr *) ad->data); break; - case EAF_TYPE_ROUTER_ID: + case T_QUAD: bsprintf(pos, "%R", e->u.data); break; - case EAF_TYPE_AS_PATH: + case T_PATH: as_path_format(ad, pos, end - pos); break; - case EAF_TYPE_BITFIELD: - bsprintf(pos, "%08x", e->u.data); - break; - case EAF_TYPE_INT_SET: - ea_show_int_set(c, ad, 1, pos, buf, end); + case T_CLIST: + ea_show_int_set(c, cls->name, ad, 1, buf); return; - case EAF_TYPE_EC_SET: - ea_show_ec_set(c, ad, pos, buf, end); + case T_ECLIST: + ea_show_ec_set(c, cls->name, ad, buf); return; - case EAF_TYPE_LC_SET: - ea_show_lc_set(c, ad, pos, buf, end); + case T_LCLIST: + ea_show_lc_set(c, cls->name, ad, buf); return; + case T_NEXTHOP_LIST: + ea_show_nexthop_list(c, (struct nexthop_adata *) e->u.ptr); + return; + case T_HOSTENTRY: + ea_show_hostentry(ad, pos, end - pos); + break; default: bsprintf(pos, "<type %02x>", e->type); - } - } + } - if (status != GA_HIDDEN) - cli_printf(c, -1012, "\t%s", buf); + cli_printf(c, -1012, "\t%s: %s", cls->name, buf); +} + +static void +nexthop_dump(const struct adata *ad) +{ + struct nexthop_adata *nhad = (struct nexthop_adata *) ad; + + debug(":"); + + NEXTHOP_WALK(nh, nhad) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } } /** @@ -1005,19 +1314,26 @@ ea_dump(ea_list *e) } while (e) { - debug("[%c%c%c]", + struct ea_storage *s = ea_is_cached(e) ? ea_get_storage(e) : NULL; + debug("[%c%c%c] uc=%d h=%08x", (e->flags & EALF_SORTED) ? 'S' : 's', (e->flags & EALF_BISECT) ? 'B' : 'b', - (e->flags & EALF_CACHED) ? 'C' : 'c'); + (e->flags & EALF_CACHED) ? 'C' : 'c', + s ? s->uc : 0, s ? s->hash_key : 0); for(i=0; i<e->count; i++) { eattr *a = &e->attrs[i]; - debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags); - debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]); + debug(" %04x.%02x", a->id, a->flags); + debug("=%c", + "?iO?IRP???S??pE?" + "??L???N?????????" + "?o???r??????????" [a->type]); if (a->originated) debug("o"); if (a->type & EAF_EMBEDDED) debug(":%08x", a->u.data); + else if (a->id == ea_gen_nexthop.id) + nexthop_dump(a->u.ptr); else { int j, len = a->u.ptr->length; @@ -1047,10 +1363,13 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { + h ^= mem_hash(&e->next, sizeof(e->next)); for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; h ^= a->id; h *= mul; + if (a->undef) + continue; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else @@ -1094,12 +1413,12 @@ static uint rta_cache_count; static uint rta_cache_size = 32; static uint rta_cache_limit; static uint rta_cache_mask; -static rta **rta_hash_table; +static struct ea_storage **rta_hash_table; static void rta_alloc_hash(void) { - rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size); + rta_hash_table = mb_allocz(rta_pool, sizeof(struct ea_storage *) * rta_cache_size); if (rta_cache_size < 32768) rta_cache_limit = rta_cache_size * 2; else @@ -1107,64 +1426,14 @@ rta_alloc_hash(void) rta_cache_mask = rta_cache_size - 1; } -static inline uint -rta_hash(rta *a) -{ - u64 h; - mem_hash_init(&h); -#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); -#define BMIX(f) mem_hash_mix_num(&h, a->f); - MIX(hostentry); - MIX(from); - MIX(igp_metric); - BMIX(source); - BMIX(scope); - BMIX(dest); - MIX(pref); -#undef MIX - - return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); -} - -static inline int -rta_same(rta *x, rta *y) -{ - return (x->source == y->source && - x->scope == y->scope && - x->dest == y->dest && - x->igp_metric == y->igp_metric && - ipa_equal(x->from, y->from) && - x->hostentry == y->hostentry && - nexthop_same(&(x->nh), &(y->nh)) && - ea_same(x->eattrs, y->eattrs)); -} - -static inline slab * -rta_slab(rta *a) -{ - return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; -} - -static rta * -rta_copy(rta *o) -{ - rta *r = sl_alloc(rta_slab(o)); - - memcpy(r, o, rta_size(o)); - r->uc = 1; - r->nh.next = nexthop_copy(o->nh.next); - r->eattrs = ea_list_copy(o->eattrs); - return r; -} - static inline void -rta_insert(rta *r) +rta_insert(struct ea_storage *r) { uint h = r->hash_key & rta_cache_mask; - r->next = rta_hash_table[h]; - if (r->next) - r->next->pprev = &r->next; - r->pprev = &rta_hash_table[h]; + r->next_hash = rta_hash_table[h]; + if (r->next_hash) + r->next_hash->pprev_hash = &r->next_hash; + r->pprev_hash = &rta_hash_table[h]; rta_hash_table[h] = r; } @@ -1173,8 +1442,8 @@ rta_rehash(void) { uint ohs = rta_cache_size; uint h; - rta *r, *n; - rta **oht = rta_hash_table; + struct ea_storage *r, *n; + struct ea_storage **oht = rta_hash_table; rta_cache_size = 2*rta_cache_size; DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); @@ -1182,7 +1451,7 @@ rta_rehash(void) for(h=0; h<ohs; h++) for(r=oht[h]; r; r=n) { - n = r->next; + n = r->next_hash; rta_insert(r); } mb_free(oht); @@ -1201,100 +1470,89 @@ rta_rehash(void) * The extended attribute lists attached to the &rta are automatically * converted to the normalized form. */ -rta * -rta_lookup(rta *o) +ea_list * +ea_lookup(ea_list *o, int overlay) { - rta *r; + struct ea_storage *r; uint h; - ASSERT(!o->cached); - if (o->eattrs) - ea_normalize(o->eattrs); + ASSERT(!ea_is_cached(o)); + o = ea_normalize(o, overlay); + h = ea_hash(o); + + RTA_LOCK; - h = rta_hash(o); - for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) - if (r->hash_key == h && rta_same(r, o)) - return rta_clone(r); + for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) + if (r->hash_key == h && ea_same(r->l, o)) + { + atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + RTA_UNLOCK; + return r->l; + } - r = rta_copy(o); + uint elen = ea_list_size(o); + uint sz = elen + sizeof(struct ea_storage); + for (uint i=0; i<ARRAY_SIZE(ea_slab_sizes); i++) + if (sz <= ea_slab_sizes[i]) + { + r = sl_alloc(ea_slab[i]); + break; + } + + int huge = r ? 0 : EALF_HUGE;; + if (huge) + r = mb_alloc(rta_pool, sz); + + ea_list_copy(r->l, o, elen); + ea_list_ref(r->l); + + r->l->flags |= EALF_CACHED | huge; r->hash_key = h; - r->cached = 1; - rt_lock_hostentry(r->hostentry); + r->uc = 1; + rta_insert(r); if (++rta_cache_count > rta_cache_limit) rta_rehash(); - return r; + RTA_UNLOCK; + return r->l; } -void -rta__free(rta *a) +static void +ea_free_locked(struct ea_storage *a) { - ASSERT(rta_cache_count && a->cached); + /* Somebody has cloned this rta inbetween. This sometimes happens. */ + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + return; + + ASSERT(rta_cache_count); rta_cache_count--; - *a->pprev = a->next; - if (a->next) - a->next->pprev = a->pprev; - rt_unlock_hostentry(a->hostentry); - if (a->nh.next) - nexthop_free(a->nh.next); - ea_free(a->eattrs); - a->cached = 0; - sl_free(a); + *a->pprev_hash = a->next_hash; + if (a->next_hash) + a->next_hash->pprev_hash = a->pprev_hash; + + ea_list_unref(a->l); + if (a->l->flags & EALF_HUGE) + mb_free(a); + else + sl_free(a); } -rta * -rta_do_cow(rta *o, linpool *lp) +static void +ea_free_nested(struct ea_list *l) { - rta *r = lp_alloc(lp, rta_size(o)); - memcpy(r, o, rta_size(o)); - for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) - { - *nhn = lp_alloc(lp, nexthop_size(nho)); - memcpy(*nhn, nho, nexthop_size(nho)); - nhn = &((*nhn)->next); - } - r->cached = 0; - r->uc = 0; - return r; + struct ea_storage *r = ea_get_storage(l); + if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) + ea_free_locked(r); } -/** - * rta_dump - dump route attributes - * @a: attribute structure to dump - * - * This function takes a &rta and dumps its contents to the debug output. - */ void -rta_dump(rta *a) +ea__free(struct ea_storage *a) { - static char *rts[] = { "", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", - "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", - "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - - debug("pref=%d uc=%d %s %s%s h=%04x", - a->pref, a->uc, rts[a->source], ip_scope_text(a->scope), - rtd[a->dest], a->hash_key); - if (!a->cached) - debug(" !CACHED"); - debug(" <-%I", a->from); - if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) - { - if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); - if (nh->labels) debug(" L %d", nh->label[0]); - for (int i=1; i<nh->labels; i++) - debug("/%d", nh->label[i]); - debug(" [%s]", nh->iface ? nh->iface->name : "???"); - } - if (a->eattrs) - { - debug(" EA: "); - ea_dump(a->eattrs); - } + RTA_LOCK; + ea_free_locked(a); + RTA_UNLOCK; } /** @@ -1304,30 +1562,29 @@ rta_dump(rta *a) * to the debug output. */ void -rta_dump_all(void) +ea_dump_all(void) { - rta *a; - uint h; + RTA_LOCK; debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); - for(h=0; h<rta_cache_size; h++) - for(a=rta_hash_table[h]; a; a=a->next) + for (uint h=0; h < rta_cache_size; h++) + for (struct ea_storage *a = rta_hash_table[h]; a; a = a->next_hash) { debug("%p ", a); - rta_dump(a); + ea_dump(a->l); debug("\n"); } debug("\n"); + + RTA_UNLOCK; } void -rta_show(struct cli *c, rta *a) +ea_show_list(struct cli *c, ea_list *eal) { - cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope)); - - for(ea_list *eal = a->eattrs; eal; eal=eal->next) - for(int i=0; i<eal->count; i++) - ea_show(c, &eal->attrs[i]); + ea_list *n = ea_normalize(eal, 0); + for (int i =0; i < n->count; i++) + ea_show(c, &n->attrs[i]); } /** @@ -1339,20 +1596,27 @@ rta_show(struct cli *c, rta *a) void rta_init(void) { - rta_pool = rp_new(&root_pool, "Attributes"); + attrs_domain = DOMAIN_NEW(attrs, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_pool = rp_new(&root_pool, "Attributes"); - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + for (uint i=0; i<ARRAY_SIZE(ea_slab_sizes); i++) + ea_slab[i] = sl_new(rta_pool, ea_slab_sizes[i]); rta_alloc_hash(); rte_src_init(); + ea_class_init(); + + /* These attributes are required to be first for nice "show route" output */ + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + + /* Other generic route attributes */ + ea_register_init(&ea_gen_preference); + ea_register_init(&ea_gen_igp_metric); + ea_register_init(&ea_gen_from); + ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_flowspec_valid); } /* |