summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--filter/f-inst.c5
-rw-r--r--lib/locking.h1
-rw-r--r--lib/route.h116
-rw-r--r--nest/proto.c12
-rw-r--r--nest/protocol.h4
-rw-r--r--nest/rt-attr.c121
-rw-r--r--nest/rt-dev.c2
-rw-r--r--nest/rt-show.c6
-rw-r--r--nest/rt-table.c37
-rw-r--r--proto/babel/babel.c15
-rw-r--r--proto/bgp/attrs.c38
-rw-r--r--proto/bgp/bgp.c21
-rw-r--r--proto/bgp/bgp.h7
-rw-r--r--proto/bgp/packets.c5
-rw-r--r--proto/mrt/mrt.c4
-rw-r--r--proto/ospf/ospf.c13
-rw-r--r--proto/ospf/ospf.h2
-rw-r--r--proto/pipe/pipe.c2
-rw-r--r--proto/rip/rip.c25
-rw-r--r--proto/static/static.c27
-rw-r--r--sysdep/unix/krt.c9
21 files changed, 360 insertions, 112 deletions
diff --git a/filter/f-inst.c b/filter/f-inst.c
index fff93517..60042476 100644
--- a/filter/f-inst.c
+++ b/filter/f-inst.c
@@ -692,7 +692,7 @@
switch (sa.sa_code)
{
case SA_NET: RESULT(sa.type, net, fs->rte->net); break;
- case SA_PROTO: RESULT(sa.type, s, fs->rte->src->proto->name); break;
+ case SA_PROTO: RESULT(sa.type, s, fs->rte->src->owner->name); break;
default:
{
struct eattr *nhea = ea_find(*fs->eattrs, &ea_gen_nexthop);
@@ -771,7 +771,8 @@
struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ?
((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL;
- neighbor *n = neigh_find(fs->rte->src->proto, ip, ifa, 0);
+ /* XXX this code supposes that every owner is a protocol XXX */
+ neighbor *n = neigh_find(SKIP_BACK(struct proto, sources, fs->rte->src->owner), ip, ifa, 0);
if (!n || (n->scope == SCOPE_HOST))
runtime( "Invalid gw address" );
diff --git a/lib/locking.h b/lib/locking.h
index a9a8aa9b..1df30063 100644
--- a/lib/locking.h
+++ b/lib/locking.h
@@ -16,6 +16,7 @@ struct lock_order {
struct domain_generic *the_bird;
struct domain_generic *proto;
struct domain_generic *rtable;
+ struct domain_generic *attrs;
struct domain_generic *resource;
};
diff --git a/lib/route.h b/lib/route.h
index 2d691215..97e2e053 100644
--- a/lib/route.h
+++ b/lib/route.h
@@ -10,12 +10,17 @@
#ifndef _BIRD_LIB_ROUTE_H_
#define _BIRD_LIB_ROUTE_H_
+#undef RT_SOURCE_DEBUG
+
#include "lib/type.h"
+#include "lib/rcu.h"
+#include "lib/hash.h"
+#include "lib/event.h"
struct network;
struct proto;
struct cli;
-
+struct rtable;
typedef struct rte {
struct ea_list *attrs; /* Attributes of this route */
@@ -43,19 +48,114 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED);
struct rte_src {
struct rte_src *next; /* Hash chain */
- struct proto *proto; /* Protocol the source is based on */
+ struct rte_owner *owner; /* Route source owner */
u32 private_id; /* Private ID, assigned by the protocol */
u32 global_id; /* Globally unique ID of the source */
- unsigned uc; /* Use count */
+ _Atomic u64 uc; /* Use count */
+};
+
+struct rte_owner_class {
+ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */
+ int (*rte_better)(struct rte *, struct rte *);
+ int (*rte_mergable)(struct rte *, struct rte *);
+ u32 (*rte_igp_metric)(const rte *);
+};
+
+struct rte_owner {
+ struct rte_owner_class *class;
+ int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
+ HASH(struct rte_src) hash;
+ const char *name;
+ u32 hash_key;
+ u32 uc;
+ event_list *list;
+ event *prune;
+ event *stop;
};
+DEFINE_DOMAIN(attrs);
+extern DOMAIN(attrs) attrs_domain;
+
+#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain)
+#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain)
+
+#define RTE_SRC_PU_SHIFT 44
+#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT)
+
+/* Get a route source. This also locks the source, therefore the caller has to
+ * unlock the source after the route has been propagated. */
+struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id);
+#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id))
-struct rte_src *rt_find_source(struct proto *p, u32 id);
-struct rte_src *rt_get_source(struct proto *p, u32 id);
struct rte_src *rt_find_source_global(u32 id);
-static inline void rt_lock_source(struct rte_src *src) { src->uc++; }
-static inline void rt_unlock_source(struct rte_src *src) { src->uc--; }
-void rt_prune_sources(void);
+
+#ifdef RT_SOURCE_DEBUG
+#define rt_lock_source _rt_lock_source_internal
+#define rt_unlock_source _rt_unlock_source_internal
+#endif
+
+static inline void rt_lock_source(struct rte_src *src)
+{
+ /* Locking a source is trivial; somebody already holds it so we just increase
+ * the use count. Nothing can be freed underneath our hands. */
+ u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel);
+ ASSERT_DIE(uc > 0);
+}
+
+static inline void rt_unlock_source(struct rte_src *src)
+{
+ /* Unlocking is tricky. We do it lockless so at the same time, the prune
+ * event may be running, therefore if the unlock gets us to zero, it must be
+ * the last thing in this routine, otherwise the prune routine may find the
+ * source's usecount zeroed, freeing it prematurely.
+ *
+ * The usecount is split into two parts:
+ * the top 20 bits are an in-progress indicator
+ * the bottom 44 bits keep the actual usecount.
+ *
+ * Therefore at most 1 million of writers can simultaneously unlock the same
+ * source, while at most ~17T different routes can reference it. Both limits
+ * are insanely high from the 2022 point of view. Let's suppose that when 17T
+ * routes or 1M writers get real, we get also 128bit atomic variables in the
+ * C norm. */
+
+ /* First, we push the in-progress indicator */
+ u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel);
+
+ /* Then we split the indicator to its parts. Remember, we got the value before the operation happened. */
+ u64 pending = (uc >> RTE_SRC_PU_SHIFT) + 1;
+ uc &= RTE_SRC_IN_PROGRESS - 1;
+
+ /* We per-use the RCU critical section indicator to make the prune event wait
+ * until we finish here in the rare case we get preempted. */
+ rcu_read_lock();
+
+ /* Obviously, there can't be more pending unlocks than the usecount itself */
+ if (uc == pending)
+ /* If we're the last unlocker, schedule the owner's prune event */
+ ev_send(src->owner->list, src->owner->prune);
+ else
+ ASSERT_DIE(uc > pending);
+
+ /* And now, finally, simultaneously pop the in-progress indicator and the
+ * usecount, possibly allowing the source pruning routine to free this structure */
+ atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel);
+
+ /* ... and to reduce the load a bit, the source pruning routine will better wait for
+ * RCU synchronization instead of a busy loop. */
+ rcu_read_unlock();
+}
+
+#ifdef RT_SOURCE_DEBUG
+#undef rt_lock_source
+#undef rt_unlock_source
+
+#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) )
+#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) )
+#endif
+
+void rt_init_sources(struct rte_owner *, const char *name, event_list *list);
+void rt_destroy_sources(struct rte_owner *, event *);
/*
* Route Attributes
diff --git a/nest/proto.c b/nest/proto.c
index 1d480ba2..e64cef28 100644
--- a/nest/proto.c
+++ b/nest/proto.c
@@ -1963,10 +1963,17 @@ channel_reset_limit(struct channel *c, struct limit *l, int dir)
c->limit_active &= ~(1 << dir);
}
+static struct rte_owner_class default_rte_owner_class;
+
static inline void
proto_do_start(struct proto *p)
{
p->active = 1;
+
+ rt_init_sources(&p->sources, p->name, proto_event_list(p));
+ if (!p->sources.class)
+ p->sources.class = &default_rte_owner_class;
+
if (!p->cf->late_if_feed)
if_feed_baby(p);
}
@@ -1975,10 +1982,8 @@ static void
proto_do_up(struct proto *p)
{
if (!p->main_source)
- {
p->main_source = rt_get_source(p, 0);
- rt_lock_source(p->main_source);
- }
+ // Locked automaticaly
proto_start_channels(p);
@@ -2005,6 +2010,7 @@ proto_do_stop(struct proto *p)
}
proto_stop_channels(p);
+ rt_destroy_sources(&p->sources, p->event);
p->do_stop = 1;
proto_send_event(p);
diff --git a/nest/protocol.h b/nest/protocol.h
index 3c823ae1..709d6415 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -60,7 +60,6 @@ struct protocol {
int (*start)(struct proto *); /* Start the instance */
int (*shutdown)(struct proto *); /* Stop the instance */
void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */
- void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */
// int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */
void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */
void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */
@@ -128,6 +127,7 @@ struct proto {
list channels; /* List of channels to rtables (struct channel) */
struct channel *main_channel; /* Primary channel */
struct rte_src *main_source; /* Primary route source */
+ struct rte_owner sources; /* Route source owner structure */
struct iface *vrf; /* Related VRF instance, NULL if global */
const char *name; /* Name of this instance (== cf->name) */
@@ -342,7 +342,7 @@ void proto_notify_state(struct proto *p, unsigned state);
*/
static inline int proto_is_inactive(struct proto *p)
-{ return (p->active_channels == 0) && (p->active_loops == 0); }
+{ return (p->active_channels == 0) && (p->active_loops == 0) && (p->sources.uc == 0); }
/*
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index b31bc5cc..b3a4c7a1 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -178,6 +178,8 @@ const char * flowspec_valid_names[FLOWSPEC__MAX] = {
[FLOWSPEC_INVALID] = "invalid",
};
+DOMAIN(attrs) attrs_domain;
+
pool *rta_pool;
static slab *rte_src_slab;
@@ -187,16 +189,14 @@ static struct idm src_ids;
/* rte source hash */
-#define RSH_KEY(n) n->proto, n->private_id
+#define RSH_KEY(n) n->private_id
#define RSH_NEXT(n) n->next
-#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2
-#define RSH_FN(p,n) p->hash_key ^ u32_hash(n)
+#define RSH_EQ(n1,n2) n1 == n2
+#define RSH_FN(n) u32_hash(n)
#define RSH_REHASH rte_src_rehash
#define RSH_PARAMS /2, *2, 1, 1, 8, 20
-#define RSH_INIT_ORDER 6
-
-static HASH(struct rte_src) src_hash;
+#define RSH_INIT_ORDER 2
static struct rte_src **rte_src_global;
static uint rte_src_global_max = SRC_ID_INIT_SIZE;
@@ -207,34 +207,44 @@ rte_src_init(void)
rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max);
idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
-
- HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER);
}
-
HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
-struct rte_src *
-rt_find_source(struct proto *p, u32 id)
+static struct rte_src *
+rt_find_source(struct rte_owner *p, u32 id)
{
- return HASH_FIND(src_hash, RSH, p, id);
+ return HASH_FIND(p->hash, RSH, id);
}
struct rte_src *
-rt_get_source(struct proto *p, u32 id)
+rt_get_source_o(struct rte_owner *p, u32 id)
{
+ if (p->stop)
+ bug("Stopping route owner asked for another source.");
+
struct rte_src *src = rt_find_source(p, id);
if (src)
+ {
+ UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel);
return src;
+ }
+ RTA_LOCK;
src = sl_allocz(rte_src_slab);
- src->proto = p;
+ src->owner = p;
src->private_id = id;
src->global_id = idm_alloc(&src_ids);
- src->uc = 0;
- HASH_INSERT2(src_hash, RSH, rta_pool, src);
+ atomic_store_explicit(&src->uc, 1, memory_order_release);
+ p->uc++;
+
+ HASH_INSERT2(p->hash, RSH, rta_pool, src);
+ if (config->table_debug)
+ log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now",
+ p->name, src->private_id, src->global_id, p->uc);
+
if (src->global_id >= rte_src_global_max)
{
rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2));
@@ -243,6 +253,7 @@ rt_get_source(struct proto *p, u32 id)
}
rte_src_global[src->global_id] = src;
+ RTA_UNLOCK;
return src;
}
@@ -256,23 +267,89 @@ rt_find_source_global(u32 id)
return rte_src_global[id];
}
+static inline void
+rt_done_sources(struct rte_owner *o)
+{
+ ev_send(o->list, o->stop);
+}
+
void
-rt_prune_sources(void)
+rt_prune_sources(void *data)
{
- HASH_WALK_FILTER(src_hash, next, src, sp)
+ struct rte_owner *o = data;
+
+ HASH_WALK_FILTER(o->hash, next, src, sp)
{
- if (src->uc == 0)
+ u64 uc;
+ while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT)
+ synchronize_rcu();
+
+ if (uc == 0)
{
- HASH_DO_REMOVE(src_hash, RSH, sp);
+ o->uc--;
+
+ HASH_DO_REMOVE(o->hash, RSH, sp);
+
+ RTA_LOCK;
+ rte_src_global[src->global_id] = NULL;
idm_free(&src_ids, src->global_id);
sl_free(src);
+ RTA_UNLOCK;
}
}
HASH_WALK_FILTER_END;
- HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool);
+ RTA_LOCK;
+ HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool);
+
+ if (o->stop && !o->uc)
+ {
+ rfree(o->prune);
+ RTA_UNLOCK;
+
+ if (config->table_debug)
+ log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name);
+
+ rt_done_sources(o);
+ }
+ else
+ RTA_UNLOCK;
}
+void
+rt_init_sources(struct rte_owner *o, const char *name, event_list *list)
+{
+ RTA_LOCK;
+ HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER);
+ o->hash_key = random_u32();
+ o->uc = 0;
+ o->name = name;
+ o->prune = ev_new_init(rta_pool, rt_prune_sources, o);
+ o->stop = NULL;
+ o->list = list;
+ RTA_UNLOCK;
+}
+
+void
+rt_destroy_sources(struct rte_owner *o, event *done)
+{
+ o->stop = done;
+
+ if (!o->uc)
+ {
+ if (config->table_debug)
+ log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name);
+
+ RTA_LOCK;
+ rfree(o->prune);
+ RTA_UNLOCK;
+
+ rt_done_sources(o);
+ }
+ else
+ if (config->table_debug)
+ log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc);
+}
/*
* Multipath Next Hop
@@ -1466,6 +1543,8 @@ ea_show_list(struct cli *c, ea_list *eal)
void
rta_init(void)
{
+ attrs_domain = DOMAIN_NEW(attrs, "Attributes");
+
rta_pool = rp_new(&root_pool, "Attributes");
rta_alloc_hash();
diff --git a/nest/rt-dev.c b/nest/rt-dev.c
index 7f45985f..4199e17c 100644
--- a/nest/rt-dev.c
+++ b/nest/rt-dev.c
@@ -68,6 +68,7 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad)
/* Use iface ID as local source ID */
struct rte_src *src = rt_get_source(P, ad->iface->index);
rte_update(c, net, NULL, src);
+ rt_unlock_source(src);
}
else if (flags & IF_CHANGE_UP)
{
@@ -95,6 +96,7 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad)
};
rte_update(c, net, &e0, src);
+ rt_unlock_source(src);
}
}
diff --git a/nest/rt-show.c b/nest/rt-show.c
index b784bf83..17400029 100644
--- a/nest/rt-show.c
+++ b/nest/rt-show.c
@@ -58,7 +58,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary
if (d->verbose && !rta_is_cached(a) && a)
a = ea_normalize(a, 0);
- get_route_info = e->src->proto->proto->get_route_info;
+ get_route_info = e->src->owner->class ? e->src->owner->class->get_route_info : NULL;
if (get_route_info)
get_route_info(e, info);
else
@@ -74,7 +74,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary
cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia,
net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest),
- e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info);
+ e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info);
if (d->verbose)
{
@@ -178,7 +178,7 @@ rt_show_net(struct rt_show_data *d, const net_addr *n, rte **feed, uint count)
}
}
- if (d->show_protocol && (d->show_protocol != e.src->proto))
+ if (d->show_protocol && (&d->show_protocol->sources != e.src->owner))
goto skip;
if (f_run(d->filter, &e, 0) > F_ACCEPT)
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 15dbc371..b8f0e61d 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -673,16 +673,16 @@ rte_better(rte *new, rte *old)
return 1;
if (np < op)
return 0;
- if (new->src->proto->proto != old->src->proto->proto)
+ if (new->src->owner->class != old->src->owner->class)
{
/*
* If the user has configured protocol preferences, so that two different protocols
* have the same preference, try to break the tie by comparing addresses. Not too
* useful, but keeps the ordering of routes unambiguous.
*/
- return new->src->proto->proto > old->src->proto->proto;
+ return new->src->owner->class > old->src->owner->class;
}
- if (better = new->src->proto->rte_better)
+ if (better = new->src->owner->class->rte_better)
return better(new, old);
return 0;
}
@@ -698,10 +698,10 @@ rte_mergable(rte *pri, rte *sec)
if (rt_get_preference(pri) != rt_get_preference(sec))
return 0;
- if (pri->src->proto->proto != sec->src->proto->proto)
+ if (pri->src->owner->class != sec->src->owner->class)
return 0;
- if (mergable = pri->src->proto->rte_mergable)
+ if (mergable = pri->src->owner->class->rte_mergable)
return mergable(pri, sec);
return 0;
@@ -1596,10 +1596,10 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr
{
if (!old->generation && !new->generation)
bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u",
- c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id);
+ c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id);
log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u",
- c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id);
+ c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id);
}
if (new && rte_same(old, &new_stored->rte))
@@ -1675,8 +1675,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr
/* If routes are not sorted, find the best route and move it on
the first position. There are several optimized cases. */
- if (src->proto->rte_recalculate &&
- src->proto->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best))
+ if (src->owner->rte_recalculate &&
+ src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best))
goto do_recalculate;
if (new_stored && rte_better(&new_stored->rte, old_best))
@@ -2876,8 +2876,6 @@ again:
}
}
- rt_prune_sources();
-
uint flushed_channels = 0;
/* Close flushed channels */
@@ -3048,7 +3046,6 @@ rt_export_cleanup(rtable *tab)
done:;
struct rt_import_hook *ih; node *x;
- _Bool imports_stopped = 0;
WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n)
if (ih->import_state == TIS_WAITING)
if (!first || (first->seq >= ih->flush_seq))
@@ -3058,19 +3055,11 @@ done:;
rem_node(&ih->n);
mb_free(ih);
rt_unlock_table(tab);
- imports_stopped = 1;
}
if (tab->export_used)
ev_schedule(tab->rt_event);
- if (imports_stopped)
- {
- if (config->table_debug)
- log(L_TRACE "%s: Sources pruning routine requested", tab->name);
-
- rt_prune_sources();
- }
if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer))
tm_stop(tab->exporter.export_timer);
@@ -3554,8 +3543,8 @@ rt_next_hop_update_net(rtable *tab, net *n)
/* Call a pre-comparison hook */
/* Not really an efficient way to compute this */
- if (e->rte.src->proto->rte_recalculate)
- e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte);
+ if (e->rte.src->owner->rte_recalculate)
+ e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte);
updates[pos++] = (struct rte_multiupdate) {
.old = e,
@@ -4170,8 +4159,8 @@ rt_get_igp_metric(const rte *rt)
if (rt_get_source_attr(rt) == RTS_DEVICE)
return 0;
- if (rt->src->proto->rte_igp_metric)
- return rt->src->proto->rte_igp_metric(rt);
+ if (rt->src->owner->class->rte_igp_metric)
+ return rt->src->owner->class->rte_igp_metric(rt);
return IGP_METRIC_UNKNOWN;
}
diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index 00b9aa79..4d024e3a 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -2259,7 +2259,7 @@ babel_kick_timer(struct babel_proto *p)
static int
babel_preexport(struct channel *C, struct rte *new)
{
- if (new->src->proto != C->proto)
+ if (new->src->owner != &C->proto->sources)
return 0;
/* Reject our own unreachable routes */
@@ -2289,7 +2289,7 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net,
uint rt_metric = ea_get_int(new->attrs, &ea_babel_metric, 0);
u64 rt_router_id = 0;
- if (new->src->proto == P)
+ if (new->src->owner == &P->sources)
{
rt_seqno = ea_get_int(new->attrs, &ea_babel_seqno, 0);
eattr *e = ea_find(new->attrs, &ea_babel_router_id);
@@ -2373,6 +2373,12 @@ babel_postconfig(struct proto_config *CF)
cf->ip6_channel = ip6 ?: ip6_sadr;
}
+static struct rte_owner_class babel_rte_owner_class = {
+ .get_route_info = babel_get_route_info,
+ .rte_better = babel_rte_better,
+ .rte_igp_metric = babel_rte_igp_metric,
+};
+
static struct proto *
babel_init(struct proto_config *CF)
{
@@ -2386,8 +2392,8 @@ babel_init(struct proto_config *CF)
P->if_notify = babel_if_notify;
P->rt_notify = babel_rt_notify;
P->preexport = babel_preexport;
- P->rte_better = babel_rte_better;
- P->rte_igp_metric = babel_rte_igp_metric;
+
+ P->sources.class = &babel_rte_owner_class;
return P;
}
@@ -2498,7 +2504,6 @@ struct protocol proto_babel = {
.start = babel_start,
.shutdown = babel_shutdown,
.reconfigure = babel_reconfigure,
- .get_route_info = babel_get_route_info,
};
void
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 2ff3f3f3..a1f5791a 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1651,8 +1651,9 @@ bgp_init_prefix_table(struct bgp_channel *bc)
}
static struct bgp_prefix *
-bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, u32 path_id, int add_path_tx)
+bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx)
{
+ u32 path_id = src->global_id;
u32 path_id_hash = add_path_tx ? path_id : 0;
/* We must use a different hash function than the rtable */
u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
@@ -1670,6 +1671,7 @@ bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, u32 path_id, int a
px->hash = hash;
px->path_id = path_id;
net_copy(px->net, net);
+ rt_lock_source(src);
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
@@ -1737,6 +1739,8 @@ bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px)
{
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
+ rt_unlock_source(rt_find_source_global(px->path_id));
+
if (c->prefix_slab)
sl_free(px);
else
@@ -1777,9 +1781,11 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket
static void
bgp_pending_tx_rfree(resource *r)
{
- UNUSED struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
+ struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
- /* Do nothing for now. */
+ HASH_WALK(ptx->prefix_hash, next, n)
+ rt_unlock_source(rt_find_source_global(n->path_id));
+ HASH_WALK_END;
}
static void bgp_pending_tx_dump(resource *r UNUSED) { debug("\n"); }
@@ -1953,9 +1959,8 @@ bgp_setup_out_table(struct bgp_channel *c)
int
bgp_preexport(struct channel *C, rte *e)
{
- struct proto *SRC = e->src->proto;
struct bgp_proto *p = (struct bgp_proto *) C->proto;
- struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
+ struct bgp_proto *src = bgp_rte_proto(e);
/* Reject our routes */
if (src == p)
@@ -2024,8 +2029,7 @@ bgp_preexport(struct channel *C, rte *e)
static ea_list *
bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
{
- struct proto *SRC = e->src->proto;
- struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
+ struct bgp_proto *src = bgp_rte_proto(e);
struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
ea_list *attrs = attrs0;
eattr *a;
@@ -2143,7 +2147,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
struct bgp_proto *p = (void *) P;
struct bgp_channel *c = (void *) C;
struct bgp_bucket *buck;
- u32 path;
+ struct rte_src *path;
if (new)
{
@@ -2155,12 +2159,12 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
/* If attributes are invalid, we fail back to withdraw */
buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx);
- path = new->src->global_id;
+ path = new->src;
}
else
{
buck = bgp_get_withdraw_bucket(c->ptx);
- path = old->src->global_id;
+ path = old->src;
}
if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck))
@@ -2178,7 +2182,7 @@ bgp_get_neighbor(rte *r)
return as;
/* If AS_PATH is not defined, we treat rte as locally originated */
- struct bgp_proto *p = (void *) r->src->proto;
+ struct bgp_proto *p = bgp_rte_proto(r);
return p->cf->confederation ?: p->local_as;
}
@@ -2208,8 +2212,8 @@ rte_stale(rte *r)
int
bgp_rte_better(rte *new, rte *old)
{
- struct bgp_proto *new_bgp = (struct bgp_proto *) new->src->proto;
- struct bgp_proto *old_bgp = (struct bgp_proto *) old->src->proto;
+ struct bgp_proto *new_bgp = bgp_rte_proto(new);
+ struct bgp_proto *old_bgp = bgp_rte_proto(old);
eattr *x, *y;
u32 n, o;
@@ -2353,8 +2357,8 @@ bgp_rte_better(rte *new, rte *old)
int
bgp_rte_mergable(rte *pri, rte *sec)
{
- struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->src->proto;
- struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->src->proto;
+ struct bgp_proto *pri_bgp = bgp_rte_proto(pri);
+ struct bgp_proto *sec_bgp = bgp_rte_proto(sec);
eattr *x, *y;
u32 p, s;
@@ -2438,8 +2442,8 @@ same_group(rte *r, u32 lpref, u32 lasn)
static inline int
use_deterministic_med(struct rte_storage *r)
{
- struct proto *P = r->rte.src->proto;
- return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
+ struct bgp_proto *p = bgp_rte_proto(&r->rte);
+ return p && p->cf->deterministic_med;
}
int
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index e6d3f125..65673e37 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -518,6 +518,12 @@ bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->ptx)
+ bgp_free_pending_tx(c);
+
ev_schedule(p->event);
}
@@ -1708,6 +1714,13 @@ done:
return p->p.proto_state;
}
+struct rte_owner_class bgp_rte_owner_class = {
+ .get_route_info = bgp_get_route_info,
+ .rte_better = bgp_rte_better,
+ .rte_mergable = bgp_rte_mergable,
+ .rte_igp_metric = bgp_rte_igp_metric,
+};
+
static struct proto *
bgp_init(struct proto_config *CF)
{
@@ -1721,10 +1734,9 @@ bgp_init(struct proto_config *CF)
P->reload_routes = bgp_reload_routes;
P->feed_begin = bgp_feed_begin;
P->feed_end = bgp_feed_end;
- P->rte_better = bgp_rte_better;
- P->rte_mergable = bgp_rte_mergable;
- P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
- P->rte_igp_metric = bgp_rte_igp_metric;
+
+ P->sources.class = &bgp_rte_owner_class;
+ P->sources.rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
p->cf = cf;
p->is_internal = (cf->local_as == cf->remote_as);
@@ -2635,7 +2647,6 @@ struct protocol proto_bgp = {
.reconfigure = bgp_reconfigure,
.copy_config = bgp_copy_config,
.get_status = bgp_get_status,
- .get_route_info = bgp_get_route_info,
.show_proto_info = bgp_show_proto_info
};
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index def7b102..fdd134f4 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -541,6 +541,7 @@ rte_resolvable(const rte *rt)
return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE);
}
+extern struct rte_owner_class bgp_rte_owner_class;
#ifdef LOCAL_DEBUG
#define BGP_FORCE_DEBUG 1
@@ -590,6 +591,12 @@ int bgp_preexport(struct channel *, struct rte *);
void bgp_get_route_info(struct rte *, byte *);
int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad);
+static inline struct bgp_proto *bgp_rte_proto(struct rte *rte)
+{
+ return (rte->src->owner->class == &bgp_rte_owner_class) ?
+ SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL;
+}
+
#define BGP_AIGP_METRIC 1
#define BGP_AIGP_MAX U64(0xffffffffffffffff)
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 57c00eb5..1f208432 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -1372,6 +1372,8 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_lis
{
if (path_id != s->last_id)
{
+ rt_unlock_source(s->last_src);
+
s->last_src = rt_get_source(&s->proto->p, path_id);
s->last_id = path_id;
@@ -2449,6 +2451,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
s->last_id = 0;
s->last_src = s->proto->p.main_source;
+ rt_lock_source(s->last_src);
/*
* IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
@@ -2475,6 +2478,8 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
rta_free(s->cached_ea);
s->cached_ea = NULL;
+
+ rt_unlock_source(s->last_src);
}
static void
diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c
index 5ef4cd44..f4c09ab1 100644
--- a/proto/mrt/mrt.c
+++ b/proto/mrt/mrt.c
@@ -472,9 +472,9 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r)
#ifdef CONFIG_BGP
/* Find peer index */
- if (r->src->proto->proto == &proto_bgp)
+ struct bgp_proto *p = bgp_rte_proto(r);
+ if (p)
{
- struct bgp_proto *p = (void *) r->src->proto;
struct mrt_peer_entry *n =
HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip);
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c
index 9c25f0f0..4e29f960 100644
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -377,8 +377,8 @@ ospf_init(struct proto_config *CF)
P->reload_routes = ospf_reload_routes;
P->feed_begin = ospf_feed_begin;
P->feed_end = ospf_feed_end;
- P->rte_better = ospf_rte_better;
- P->rte_igp_metric = ospf_rte_igp_metric;
+
+ P->sources.class = &ospf_rte_owner_class;
return P;
}
@@ -492,7 +492,7 @@ ospf_preexport(struct channel *C, rte *e)
struct ospf_area *oa = ospf_main_area(p);
/* Reject our own routes */
- if (e->src->proto == &p->p)
+ if (e->sender == C->in_req.hook)
return -1;
/* Do not export routes to stub areas */
@@ -1506,6 +1506,12 @@ ospf_sh_lsadb(struct lsadb_show_data *ld)
}
+struct rte_owner_class ospf_rte_owner_class = {
+ .get_route_info = ospf_get_route_info,
+ .rte_better = ospf_rte_better,
+ .rte_igp_metric = ospf_rte_igp_metric,
+};
+
struct protocol proto_ospf = {
.name = "OSPF",
.template = "ospf%d",
@@ -1519,7 +1525,6 @@ struct protocol proto_ospf = {
.shutdown = ospf_shutdown,
.reconfigure = ospf_reconfigure,
.get_status = ospf_get_status,
- .get_route_info = ospf_get_route_info
};
struct ea_class ea_ospf_metric1 = {
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index 7bed5c85..3477ba5a 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -1002,6 +1002,8 @@ void ospf_sh_state(struct proto *P, int verbose, int reachable);
void ospf_sh_lsadb(struct lsadb_show_data *ld);
+extern struct rte_owner_class ospf_rte_owner_class;
+
/* iface.c */
void ospf_iface_chstate(struct ospf_iface *ifa, u8 state);
void ospf_iface_sm(struct ospf_iface *ifa, int event);
diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c
index 8af6de81..b3b50a0d 100644
--- a/proto/pipe/pipe.c
+++ b/proto/pipe/pipe.c
@@ -87,7 +87,7 @@ pipe_preexport(struct channel *C, rte *e)
{
log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u",
e->generation, max_generation, C->proto->name,
- C->table->name, e->net, e->src->proto->name, e->src->private_id, e->src->global_id);
+ C->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id);
return -1;
}
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index f5c01380..ab0e3f4b 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -377,7 +377,7 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s
en->valid = RIP_ENTRY_VALID;
en->metric = rt_metric;
en->tag = rt_tag;
- en->from = (new->src->proto == P) ? rt_from : NULL;
+ en->from = (new->src->owner == &P->sources) ? rt_from : NULL;
eattr *nhea = ea_find(new->attrs, &ea_gen_nexthop);
if (nhea)
@@ -1112,11 +1112,20 @@ rip_reload_routes(struct channel *C)
rip_kick_timer(p);
}
+static struct rte_owner_class rip_rte_owner_class;
+
+static inline struct rip_proto *
+rip_rte_proto(struct rte *rte)
+{
+ return (rte->src->owner->class == &rip_rte_owner_class) ?
+ SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL;
+}
+
static int
rip_rte_better(struct rte *new, struct rte *old)
{
ASSERT_DIE(new->src == old->src);
- struct rip_proto *p = (struct rip_proto *) new->src->proto;
+ struct rip_proto *p = rip_rte_proto(new);
u32 new_metric = ea_get_int(new->attrs, &ea_rip_metric, p->infinity);
u32 old_metric = ea_get_int(old->attrs, &ea_rip_metric, p->infinity);
@@ -1151,8 +1160,7 @@ rip_init(struct proto_config *CF)
P->rt_notify = rip_rt_notify;
P->neigh_notify = rip_neigh_notify;
P->reload_routes = rip_reload_routes;
- P->rte_better = rip_rte_better;
- P->rte_igp_metric = rip_rte_igp_metric;
+ P->sources.class = &rip_rte_owner_class;
return P;
}
@@ -1227,7 +1235,7 @@ rip_reconfigure(struct proto *P, struct proto_config *CF)
static void
rip_get_route_info(rte *rte, byte *buf)
{
- struct rip_proto *p = (struct rip_proto *) rte->src->proto;
+ struct rip_proto *p = rip_rte_proto(rte);
u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity);
u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0);
@@ -1359,6 +1367,12 @@ rip_dump(struct proto *P)
}
+static struct rte_owner_class rip_rte_owner_class = {
+ .get_route_info = rip_get_route_info,
+ .rte_better = rip_rte_better,
+ .rte_igp_metric = rip_rte_igp_metric,
+};
+
struct protocol proto_rip = {
.name = "RIP",
.template = "rip%d",
@@ -1372,7 +1386,6 @@ struct protocol proto_rip = {
.start = rip_start,
.shutdown = rip_shutdown,
.reconfigure = rip_reconfigure,
- .get_route_info = rip_get_route_info,
};
void
diff --git a/proto/static/static.c b/proto/static/static.c
index f0a514f7..65f3eccc 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -50,11 +50,14 @@
static inline struct rte_src * static_get_source(struct static_proto *p, uint i)
{ return i ? rt_get_source(&p->p, i) : p->p.main_source; }
+static inline void static_free_source(struct rte_src *src, uint i)
+{ if (i) rt_unlock_source(src); }
+
static void
static_announce_rte(struct static_proto *p, struct static_route *r)
{
+ struct rte_src *src;
ea_list *ea = NULL;
- struct rte_src *src = static_get_source(p, r->index);
ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference);
ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_STATIC);
@@ -114,6 +117,7 @@ static_announce_rte(struct static_proto *p, struct static_route *r)
return;
/* We skip rta_lookup() here */
+ src = static_get_source(p, r->index);
rte e0 = { .attrs = ea, .src = src, .net = r->net, }, *e = &e0;
/* Evaluate the filter */
@@ -121,6 +125,8 @@ static_announce_rte(struct static_proto *p, struct static_route *r)
f_eval_rte(r->cmds, e);
rte_update(p->p.main_channel, r->net, e, src);
+ static_free_source(src, r->index);
+
r->state = SRS_CLEAN;
return;
@@ -128,7 +134,9 @@ withdraw:
if (r->state == SRS_DOWN)
return;
+ src = static_get_source(p, r->index);
rte_update(p->p.main_channel, r->net, NULL, src);
+ static_free_source(src, r->index);
r->state = SRS_DOWN;
}
@@ -294,7 +302,11 @@ static void
static_remove_rte(struct static_proto *p, struct static_route *r)
{
if (r->state)
- rte_update(p->p.main_channel, r->net, NULL, static_get_source(p, r->index));
+ {
+ struct rte_src *src = static_get_source(p, r->index);
+ rte_update(p->p.main_channel, r->net, NULL, src);
+ static_free_source(src, r->index);
+ }
static_reset_rte(p, r);
}
@@ -437,6 +449,8 @@ static_postconfig(struct proto_config *CF)
static_index_routes(cf);
}
+static struct rte_owner_class static_rte_owner_class;
+
static struct proto *
static_init(struct proto_config *CF)
{
@@ -448,8 +462,7 @@ static_init(struct proto_config *CF)
P->neigh_notify = static_neigh_notify;
P->reload_routes = static_reload_routes;
- P->rte_better = static_rte_better;
- P->rte_mergable = static_rte_mergable;
+ P->sources.class = &static_rte_owner_class;
if (cf->igp_table_ip4)
p->igp_table_ip4 = cf->igp_table_ip4->table;
@@ -748,6 +761,11 @@ static_show(struct proto *P)
static_show_rt(r);
}
+static struct rte_owner_class static_rte_owner_class = {
+ .get_route_info = static_get_route_info,
+ .rte_better = static_rte_better,
+ .rte_mergable = static_rte_mergable,
+};
struct protocol proto_static = {
.name = "Static",
@@ -763,7 +781,6 @@ struct protocol proto_static = {
.shutdown = static_shutdown,
.reconfigure = static_reconfigure,
.copy_config = static_copy_config,
- .get_route_info = static_get_route_info,
};
void
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 46b5a51d..f796a159 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -314,6 +314,7 @@ krt_learn_scan(struct krt_proto *p, rte *e)
ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference);
rte_update(p->p.main_channel, e->net, &e0, e0.src);
+ rt_unlock_source(e0.src);
}
static void
@@ -322,9 +323,9 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
if (new)
return krt_learn_scan(p, e);
- struct rte_src *src = rt_find_source(&p->p, krt_metric(e));
- if (src)
- rte_update(p->p.main_channel, e->net, NULL, src);
+ struct rte_src *src = rt_get_source(&p->p, krt_metric(e));
+ rte_update(p->p.main_channel, e->net, NULL, src);
+ rt_unlock_source(src);
}
#endif
@@ -683,7 +684,7 @@ krt_scan_timer_kick(struct krt_proto *p)
static int
krt_preexport(struct channel *C, rte *e)
{
- if (e->src->proto == C->proto)
+ if (e->src->owner == &C->proto->sources)
return -1;
if (!krt_capable(e))