summaryrefslogtreecommitdiff
path: root/lib/route.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/route.h')
-rw-r--r--lib/route.h131
1 files changed, 116 insertions, 15 deletions
diff --git a/lib/route.h b/lib/route.h
index 68596316..eae251e7 100644
--- a/lib/route.h
+++ b/lib/route.h
@@ -10,12 +10,17 @@
#ifndef _BIRD_LIB_ROUTE_H_
#define _BIRD_LIB_ROUTE_H_
+#undef RT_SOURCE_DEBUG
+
#include "lib/type.h"
+#include "lib/rcu.h"
+#include "lib/hash.h"
+#include "lib/event.h"
struct network;
struct proto;
struct cli;
-
+struct rtable_private;
typedef struct rte {
struct ea_list *attrs; /* Attributes of this route */
@@ -29,12 +34,10 @@ typedef struct rte {
u8 generation; /* If this route import is based on other previously exported route,
this value should be 1 + MAX(generation of the parent routes).
Otherwise the route is independent and this value is zero. */
+ u8 stale_cycle; /* Auxiliary value for route refresh */
} rte;
#define REF_FILTERED 2 /* Route is rejected by import filter */
-#define REF_STALE 4 /* Route is stale in a refresh cycle */
-#define REF_DISCARD 8 /* Route is scheduled for discard */
-#define REF_MODIFY 16 /* Route is scheduled for modify */
#define REF_PENDING 32 /* Route has not propagated completely yet */
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
@@ -45,19 +48,114 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED);
struct rte_src {
struct rte_src *next; /* Hash chain */
- struct proto *proto; /* Protocol the source is based on */
+ struct rte_owner *owner; /* Route source owner */
u32 private_id; /* Private ID, assigned by the protocol */
u32 global_id; /* Globally unique ID of the source */
- unsigned uc; /* Use count */
+ _Atomic u64 uc; /* Use count */
+};
+
+struct rte_owner_class {
+ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */
+ int (*rte_better)(struct rte *, struct rte *);
+ int (*rte_mergable)(struct rte *, struct rte *);
+ u32 (*rte_igp_metric)(const rte *);
};
+struct rte_owner {
+ struct rte_owner_class *class;
+ int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte *, struct rte *, struct rte *);
+ HASH(struct rte_src) hash;
+ const char *name;
+ u32 hash_key;
+ u32 uc;
+ event_list *list;
+ event *prune;
+ event *stop;
+};
+
+DEFINE_DOMAIN(attrs);
+extern DOMAIN(attrs) attrs_domain;
+
+#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain)
+#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain)
+
+#define RTE_SRC_PU_SHIFT 44
+#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT)
+
+/* Get a route source. This also locks the source, therefore the caller has to
+ * unlock the source after the route has been propagated. */
+struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id);
+#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id))
-struct rte_src *rt_find_source(struct proto *p, u32 id);
-struct rte_src *rt_get_source(struct proto *p, u32 id);
struct rte_src *rt_find_source_global(u32 id);
-static inline void rt_lock_source(struct rte_src *src) { src->uc++; }
-static inline void rt_unlock_source(struct rte_src *src) { src->uc--; }
-void rt_prune_sources(void);
+
+#ifdef RT_SOURCE_DEBUG
+#define rt_lock_source _rt_lock_source_internal
+#define rt_unlock_source _rt_unlock_source_internal
+#endif
+
+static inline void rt_lock_source(struct rte_src *src)
+{
+ /* Locking a source is trivial; somebody already holds it so we just increase
+ * the use count. Nothing can be freed underneath our hands. */
+ u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel);
+ ASSERT_DIE(uc > 0);
+}
+
+static inline void rt_unlock_source(struct rte_src *src)
+{
+ /* Unlocking is tricky. We do it lockless so at the same time, the prune
+ * event may be running, therefore if the unlock gets us to zero, it must be
+ * the last thing in this routine, otherwise the prune routine may find the
+ * source's usecount zeroed, freeing it prematurely.
+ *
+ * The usecount is split into two parts:
+ * the top 20 bits are an in-progress indicator
+ * the bottom 44 bits keep the actual usecount.
+ *
+ * Therefore at most 1 million of writers can simultaneously unlock the same
+ * source, while at most ~17T different routes can reference it. Both limits
+ * are insanely high from the 2022 point of view. Let's suppose that when 17T
+ * routes or 1M writers get real, we get also 128bit atomic variables in the
+ * C norm. */
+
+ /* First, we push the in-progress indicator */
+ u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel);
+
+ /* Then we split the indicator to its parts. Remember, we got the value before the operation happened. */
+ u64 pending = (uc >> RTE_SRC_PU_SHIFT) + 1;
+ uc &= RTE_SRC_IN_PROGRESS - 1;
+
+ /* We per-use the RCU critical section indicator to make the prune event wait
+ * until we finish here in the rare case we get preempted. */
+ rcu_read_lock();
+
+ /* Obviously, there can't be more pending unlocks than the usecount itself */
+ if (uc == pending)
+ /* If we're the last unlocker, schedule the owner's prune event */
+ ev_send(src->owner->list, src->owner->prune);
+ else
+ ASSERT_DIE(uc > pending);
+
+ /* And now, finally, simultaneously pop the in-progress indicator and the
+ * usecount, possibly allowing the source pruning routine to free this structure */
+ atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel);
+
+ /* ... and to reduce the load a bit, the source pruning routine will better wait for
+ * RCU synchronization instead of a busy loop. */
+ rcu_read_unlock();
+}
+
+#ifdef RT_SOURCE_DEBUG
+#undef rt_lock_source
+#undef rt_unlock_source
+
+#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) )
+#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) )
+#endif
+
+void rt_init_sources(struct rte_owner *, const char *name, event_list *list);
+void rt_destroy_sources(struct rte_owner *, event *);
/*
* Route Attributes
@@ -159,7 +257,7 @@ typedef struct ea_list {
struct ea_storage {
struct ea_storage *next_hash; /* Next in hash chain */
struct ea_storage **pprev_hash; /* Previous in hash chain */
- u32 uc; /* Use count */
+ _Atomic u32 uc; /* Use count */
u32 hash_key; /* List hash */
ea_list l[0]; /* The list itself */
};
@@ -425,15 +523,18 @@ static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED
static inline struct ea_storage *ea_get_storage(ea_list *r)
{
ASSERT_DIE(ea_is_cached(r));
- return SKIP_BACK(struct ea_storage, l, r);
+ return SKIP_BACK(struct ea_storage, l[0], r);
}
-static inline ea_list *ea_clone(ea_list *r) { ea_get_storage(r)->uc++; return r; }
+static inline ea_list *ea_clone(ea_list *r) {
+ ASSERT_DIE(0 < atomic_fetch_add_explicit(&ea_get_storage(r)->uc, 1, memory_order_acq_rel));
+ return r;
+}
void ea__free(struct ea_storage *r);
static inline void ea_free(ea_list *l) {
if (!l) return;
struct ea_storage *r = ea_get_storage(l);
- if (!--r->uc) ea__free(r);
+ if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) ea__free(r);
}
void ea_dump(ea_list *);