summaryrefslogtreecommitdiff
path: root/nest/rt.h
diff options
context:
space:
mode:
Diffstat (limited to 'nest/rt.h')
-rw-r--r--nest/rt.h369
1 files changed, 272 insertions, 97 deletions
diff --git a/nest/rt.h b/nest/rt.h
index 5a8c5a19..6ee2ce9b 100644
--- a/nest/rt.h
+++ b/nest/rt.h
@@ -17,12 +17,19 @@
#include "lib/type.h"
#include "lib/fib.h"
#include "lib/route.h"
+#include "lib/event.h"
+#include "lib/rcu.h"
+#include "lib/io-loop.h"
+#include "lib/settle.h"
+
+#include <stdatomic.h>
struct ea_list;
struct protocol;
struct proto;
struct channel;
struct rte_src;
+struct hostcache;
struct symbol;
struct timer;
struct filter;
@@ -30,6 +37,10 @@ struct f_trie;
struct f_trie_walk_state;
struct cli;
+struct rt_cork_threshold {
+ u64 low, high;
+};
+
/*
* Master Routing Tables. Generally speaking, each of them contains a FIB
* with each entry pointing to a list of route entries representing routes
@@ -44,61 +55,99 @@ struct cli;
struct rtable_config {
node n;
char *name;
- struct rtable *table;
+ union rtable *table;
struct proto_config *krt_attached; /* Kernel syncer attached to this table */
uint addr_type; /* Type of address data stored in table (NET_*) */
- int gc_max_ops; /* Maximum number of operations before GC is run */
- int gc_min_time; /* Minimum time between two consecutive GC runs */
+ uint gc_threshold; /* Maximum number of operations before GC is run */
+ uint gc_period; /* Approximate time between two consecutive GC runs */
byte sorted; /* Routes of network are sorted according to rte_better() */
- byte internal; /* Internal table of a protocol */
byte trie_used; /* Rtable has attached trie */
- btime min_settle_time; /* Minimum settle time for notifications */
- btime max_settle_time; /* Maximum settle time for notifications */
+ byte debug; /* Whether to log */
+ struct rt_cork_threshold cork_threshold; /* Cork threshold values */
+ struct settle_config export_settle; /* Export announcement settler */
+ struct settle_config export_rr_settle;/* Export announcement settler config valid when any
+ route refresh is running */
};
struct rt_export_hook;
struct rt_export_request;
+struct rt_exporter;
+
+struct rt_exporter_class {
+ void (*start)(struct rt_exporter *, struct rt_export_request *);
+ void (*stop)(struct rt_export_hook *);
+ void (*done)(void *_rt_export_hook);
+};
struct rt_exporter {
+ const struct rt_exporter_class *class;
+ pool *rp;
list hooks; /* Registered route export hooks */
uint addr_type; /* Type of address data exported (NET_*) */
- struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *);
- void (*stop)(struct rt_export_hook *);
- void (*done)(struct rt_export_hook *);
};
-typedef struct rtable {
- resource r;
- node n; /* Node in list of all tables */
+struct rt_table_exporter {
+ struct rt_exporter e;
+ list pending; /* List of packed struct rt_pending_export */
+
+ struct rt_pending_export *first; /* First export to announce */
+ u64 next_seq; /* The next export will have this ID */
+};
+
+extern uint rtable_max_id;
+
+DEFINE_DOMAIN(rtable);
+
+/* The public part of rtable structure */
+#define RTABLE_PUBLIC \
+ resource r; \
+ node n; /* Node in list of all tables */ \
+ char *name; /* Name of this table */ \
+ uint addr_type; /* Type of address data stored in table (NET_*) */ \
+ uint id; /* Integer table ID for fast lookup */ \
+ DOMAIN(rtable) lock; /* Lock to take to access the private parts */ \
+ struct rtable_config *config; /* Configuration of this table */ \
+ struct birdloop *loop; /* Service thread */ \
+
+/* The complete rtable structure */
+struct rtable_private {
+ /* Once more the public part */
+ RTABLE_PUBLIC;
+
+ /* Here the private items not to be accessed without locking */
pool *rp; /* Resource pool to allocate everything from, including itself */
struct slab *rte_slab; /* Slab to allocate route objects */
struct fib fib;
struct f_trie *trie; /* Trie of prefixes defined in fib */
- char *name; /* Name of this table */
- uint addr_type; /* Type of address data stored in table (NET_*) */
int use_count; /* Number of protocols using this table */
u32 rt_count; /* Number of routes in the table */
list imports; /* Registered route importers */
- struct rt_exporter exporter; /* Exporter API structure */
+ struct rt_table_exporter exporter; /* Exporter API structure */
struct hmap id_map;
struct hostcache *hostcache;
- struct rtable_config *config; /* Configuration of this table */
struct config *deleted; /* Table doesn't exist in current configuration,
* delete as soon as use_count becomes 0 and remove
* obstacle from this routing table.
*/
- struct event *rt_event; /* Routing table event */
+ struct event *nhu_uncork_event; /* Helper event to schedule NHU on uncork */
+ struct settle export_settle; /* Export batching settle timer */
+ struct timer *prune_timer; /* Timer for periodic pruning / GC */
+ struct birdloop_flag_handler fh; /* Handler for simple events */
btime last_rt_change; /* Last time when route changed */
- btime base_settle_time; /* Start time of rtable settling interval */
btime gc_time; /* Time of last GC */
- int gc_counter; /* Number of operations since last GC */
+ uint gc_counter; /* Number of operations since last GC */
+ uint rr_counter; /* Number of currently running route refreshes,
+ in fact sum of (stale_set - stale_pruned) over all importers
+ + one for each TIS_FLUSHING importer */
byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */
byte prune_trie; /* Prune prefix trie during next table prune */
- byte hcu_scheduled; /* Hostcache update is scheduled */
byte nhu_state; /* Next Hop Update state */
- byte internal; /* This table is internal for some other object */
+ byte nhu_corked; /* Next Hop Update is corked with this state */
+ byte export_used; /* Pending Export pruning is scheduled */
+ byte cork_active; /* Cork has been activated */
+ struct rt_cork_threshold cork_threshold; /* Threshold for table cork */
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
struct f_trie *trie_new; /* New prefix trie defined during pruning */
@@ -107,69 +156,83 @@ typedef struct rtable {
u32 trie_old_lock_count; /* Old prefix trie locked by walks */
struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */
- list subscribers; /* Subscribers for notifications */
- struct timer *settle_timer; /* Settle time for notifications */
- list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */
struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */
+};
+
+/* The final union private-public rtable structure */
+typedef union rtable {
+ struct {
+ RTABLE_PUBLIC;
+ };
+ struct rtable_private priv;
} rtable;
-struct rt_subscription {
- node n;
- rtable *tab;
- void (*hook)(struct rt_subscription *b);
- void *data;
-};
+#define RT_IS_LOCKED(tab) DOMAIN_IS_LOCKED(rtable, (tab)->lock)
-struct rt_flowspec_link {
- node n;
- rtable *src;
- rtable *dst;
- u32 uc;
-};
+#define RT_LOCK(tab) ({ LOCK_DOMAIN(rtable, (tab)->lock); &(tab)->priv; })
+#define RT_UNLOCK(tab) UNLOCK_DOMAIN(rtable, (tab)->lock)
+#define RT_PRIV(tab) ({ ASSERT_DIE(RT_IS_LOCKED((tab))); &(tab)->priv; })
+#define RT_PUB(tab) SKIP_BACK(rtable, priv, tab)
+
+#define RT_LOCKED(tpub, tpriv) for (struct rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL))
+#define RT_RETURN(tpriv, ...) do { RT_UNLOCK(tpriv); return __VA_ARGS__; } while (0)
+
+#define RT_PRIV_SAME(tpriv, tpub) (&(tpub)->priv == (tpriv))
+
+/* Flags for birdloop_flag() */
+#define RTF_CLEANUP 1
+#define RTF_NHU 2
+#define RTF_EXPORT 4
+
+extern struct rt_cork {
+ _Atomic uint active;
+ event_list queue;
+ event run;
+} rt_cork;
+
+static inline void rt_cork_acquire(void)
+{
+ atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel);
+}
+
+static inline void rt_cork_release(void)
+{
+ if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1)
+ {
+ synchronize_rcu();
+ ev_send(&global_work_list, &rt_cork.run);
+ }
+}
+
+static inline int rt_cork_check(event *e)
+{
+ rcu_read_lock();
+
+ int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0);
+ if (corked)
+ ev_send(&rt_cork.queue, e);
+
+ rcu_read_unlock();
+
+ return corked;
+}
-#define NHU_CLEAN 0
-#define NHU_SCHEDULED 1
-#define NHU_RUNNING 2
-#define NHU_DIRTY 3
typedef struct network {
- struct rte_storage *routes; /* Available routes for this network */
+ struct rte_storage *routes; /* Available routes for this network */
+ struct rt_pending_export *first, *last;
struct fib_node n; /* FIB flags reserved for kernel syncer */
} net;
-struct hostcache {
- slab *slab; /* Slab holding all hostentries */
- struct hostentry **hash_table; /* Hash table for hostentries */
- unsigned hash_order, hash_shift;
- unsigned hash_max, hash_min;
- unsigned hash_items;
- linpool *lp; /* Linpool for trie */
- struct f_trie *trie; /* Trie of prefixes that might affect hostentries */
- list hostentries; /* List of all hostentries */
- byte update_hostcache;
-};
-
-struct hostentry {
- node ln;
- ip_addr addr; /* IP address of host, part of key */
- ip_addr link; /* (link-local) IP address of host, used as gw
- if host is directly attached */
- struct rtable *tab; /* Dependent table, part of key */
- struct hostentry *next; /* Next in hash chain */
- unsigned hash_key; /* Hash key */
- unsigned uc; /* Use count */
- ea_list *src; /* Source attributes */
- byte nexthop_linkable; /* Nexthop list is completely non-device */
- u32 igp_metric; /* Chosen route IGP metric */
-};
-
struct rte_storage {
struct rte_storage *next; /* Next in chain */
struct rte rte; /* Route data */
};
-#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL)
-#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL)
+#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {})
+#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {})
+#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL)
+#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL)
/* Table-channel connections */
@@ -178,12 +241,13 @@ struct rt_import_request {
char *name;
u8 trace_routes;
+ event_list *list; /* Where to schedule announce events */
+
void (*dump_req)(struct rt_import_request *req);
void (*log_state_change)(struct rt_import_request *req, u8 state);
/* Preimport is called when the @new route is just-to-be inserted, replacing @old.
* Return a route (may be different or modified in-place) to continue or NULL to withdraw. */
int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old);
- struct rte *(*rte_modify)(struct rte *, struct linpool *);
};
struct rt_import_hook {
@@ -200,15 +264,23 @@ struct rt_import_hook {
u32 withdraws_accepted; /* Number of route withdraws accepted and processed */
} stats;
+ u64 flush_seq; /* Table export seq when the channel announced flushing */
btime last_state_change; /* Time of last state transition */
u8 import_state; /* IS_* */
+ u8 stale_set; /* Set this stale_cycle to imported routes */
+ u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */
+ u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */
+ u8 stale_pruning; /* Last prune started when this value was set at stale_valid */
void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */
+ event announce_event; /* This event announces table updates */
};
struct rt_pending_export {
+ struct rt_pending_export * _Atomic next; /* Next export for the same destination */
struct rte_storage *new, *new_best, *old, *old_best;
+ u64 seq; /* Sequential ID (table-local) of the pending export */
};
struct rt_export_request {
@@ -218,6 +290,8 @@ struct rt_export_request {
u8 trace_routes;
u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */
+ event_list *list; /* Where to schedule export events */
+
/* There are two methods of export. You can either request feeding every single change
* or feeding the whole route feed. In case of regular export, &export_one is preferred.
* Anyway, when feeding, &export_bulk is preferred, falling back to &export_one.
@@ -237,7 +311,6 @@ struct rt_export_hook {
struct rt_exporter *table; /* The connected table */
pool *pool;
- linpool *lp;
struct rt_export_request *req; /* The requestor */
@@ -247,24 +320,44 @@ struct rt_export_hook {
u32 withdraws_received; /* Number of route withdraws received */
} stats;
+ btime last_state_change; /* Time of last state transition */
+
+ _Atomic u8 export_state; /* Route export state (TES_*, see below) */
+ struct event event; /* Event running all the export operations */
+
+ struct bmap seq_map; /* Keep track which exports were already procesed */
+
+ void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */
+};
+
+struct rt_table_export_hook {
+ union {
+ struct rt_export_hook h;
+ struct { /* Overriding the parent structure beginning */
+ node _n;
+ struct rt_table_exporter *table;
+ };
+ };
+
union {
struct fib_iterator feed_fit; /* Routing table iterator used during feeding */
struct {
struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
struct f_trie *walk_lock; /* Locked trie for walking */
+ union { /* Last net visited but not processed */
+ net_addr walk_last;
+ net_addr_ip4 walk_last_ip4;
+ net_addr_ip6 walk_last_ip6;
+ };
};
- u32 hash_iter; /* Iterator over hash */
};
- btime last_state_change; /* Time of last state transition */
+ struct rt_pending_export *_Atomic last_export;/* Last export processed */
+ struct rt_pending_export *rpe_next; /* Next pending export to process */
u8 refeed_pending; /* Refeeding and another refeed is scheduled */
- u8 export_state; /* Route export state (TES_*, see below) */
u8 feed_type; /* Which feeding method is used (TFT_*, see below) */
- struct event *event; /* Event running all the export operations */
-
- void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */
};
#define TIS_DOWN 0
@@ -276,6 +369,7 @@ struct rt_export_hook {
#define TIS_MAX 6
#define TES_DOWN 0
+#define TES_HUNGRY 1
#define TES_FEEDING 2
#define TES_READY 3
#define TES_STOP 4
@@ -293,7 +387,8 @@ struct rt_export_hook {
#define TFT_HASH 3
void rt_request_import(rtable *tab, struct rt_import_request *req);
-void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req);
+void rt_request_export(rtable *tab, struct rt_export_request *req);
+void rt_request_export_other(struct rt_exporter *tab, struct rt_export_request *req);
void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req);
@@ -310,6 +405,36 @@ void rt_set_export_state(struct rt_export_hook *hook, u8 state);
void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src);
+/*
+ * For table export processing
+ */
+
+/* Get next rpe. If src is given, it must match. */
+struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src);
+
+/* Walk all rpe's */
+#define RPE_WALK(first, it, src) \
+ for (struct rt_pending_export *it = (first); it; it = rpe_next(it, (src)))
+
+/* Mark the pending export processed */
+void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe);
+
+#define rpe_mark_seen_all(hook, first, src) \
+ RPE_WALK((first), _rpe, (src)) rpe_mark_seen((hook), _rpe)
+
+/* Get pending export seen status */
+int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe);
+
+/*
+ * For rt_export_hook and rt_exporter inheritance
+ */
+
+void rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook);
+struct rt_export_hook *rt_alloc_export(struct rt_exporter *re, uint size);
+void rt_stop_export_common(struct rt_export_hook *hook);
+void rt_export_stopped(struct rt_export_hook *hook);
+void rt_exporter_init(struct rt_exporter *re);
+
/* Types of route announcement, also used as flags */
#define RA_UNDEF 0 /* Undefined RA type */
#define RA_OPTIMAL 1 /* Announcement of optimal route change */
@@ -323,6 +448,49 @@ void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, st
#define RIC_REJECT -1 /* Rejected by protocol */
#define RIC_DROP -2 /* Silently dropped by protocol */
+/*
+ * Next hop update data structures
+ */
+
+#define NHU_CLEAN 0
+#define NHU_SCHEDULED 1
+#define NHU_RUNNING 2
+#define NHU_DIRTY 3
+
+struct hostentry {
+ node ln;
+ ip_addr addr; /* IP address of host, part of key */
+ ip_addr link; /* (link-local) IP address of host, used as gw
+ if host is directly attached */
+ rtable *tab; /* Dependent table, part of key */
+ struct hostentry *next; /* Next in hash chain */
+ unsigned hash_key; /* Hash key */
+ unsigned uc; /* Use count */
+ ea_list *src; /* Source attributes */
+ byte nexthop_linkable; /* Nexthop list is completely non-device */
+ u32 igp_metric; /* Chosen route IGP metric */
+};
+
+struct hostcache {
+ slab *slab; /* Slab holding all hostentries */
+ struct hostentry **hash_table; /* Hash table for hostentries */
+ unsigned hash_order, hash_shift;
+ unsigned hash_max, hash_min;
+ unsigned hash_items;
+ linpool *lp; /* Linpool for trie */
+ struct f_trie *trie; /* Trie of prefixes that might affect hostentries */
+ list hostentries; /* List of all hostentries */
+ event update;
+ struct rt_export_request req; /* Notifier */
+};
+
+struct rt_flowspec_link {
+ rtable *src;
+ rtable *dst;
+ u32 uc;
+ struct rt_export_request req;
+};
+
#define rte_update channel_rte_import
/**
* rte_update - enter a new update to a routing table
@@ -363,33 +531,38 @@ struct config;
void rt_init(void);
void rt_preconfig(struct config *);
+void rt_postconfig(struct config *);
void rt_commit(struct config *new, struct config *old);
-void rt_lock_table(rtable *);
-void rt_unlock_table(rtable *);
-struct f_trie * rt_lock_trie(rtable *tab);
-void rt_unlock_trie(rtable *tab, struct f_trie *trie);
-void rt_subscribe(rtable *tab, struct rt_subscription *s);
-void rt_unsubscribe(struct rt_subscription *s);
+void rt_lock_table_priv(struct rtable_private *, const char *file, uint line);
+void rt_unlock_table_priv(struct rtable_private *, const char *file, uint line);
+static inline void rt_lock_table_pub(rtable *t, const char *file, uint line)
+{ RT_LOCKED(t, tt) rt_lock_table_priv(tt, file, line); }
+static inline void rt_unlock_table_pub(rtable *t, const char *file, uint line)
+{ RT_LOCKED(t, tt) rt_unlock_table_priv(tt, file, line); }
+
+#define rt_lock_table(t) _Generic((t), rtable *: rt_lock_table_pub, \
+ struct rtable_private *: rt_lock_table_priv)((t), __FILE__, __LINE__)
+#define rt_unlock_table(t) _Generic((t), rtable *: rt_unlock_table_pub, \
+ struct rtable_private *: rt_unlock_table_priv)((t), __FILE__, __LINE__)
+
+struct f_trie * rt_lock_trie(struct rtable_private *tab);
+void rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie);
void rt_flowspec_link(rtable *src, rtable *dst);
void rt_flowspec_unlink(rtable *src, rtable *dst);
rtable *rt_setup(pool *, struct rtable_config *);
-static inline void rt_shutdown(rtable *r) { rfree(r->rp); }
-static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); }
-static inline net *net_find_valid(rtable *tab, const net_addr *addr)
+static inline net *net_find(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); }
+static inline net *net_find_valid(struct rtable_private *tab, const net_addr *addr)
{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; }
-static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); }
-net *net_get(rtable *tab, const net_addr *addr);
-net *net_route(rtable *tab, const net_addr *n);
+static inline net *net_get(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); }
+net *net_route(struct rtable_private *tab, const net_addr *n);
int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter);
rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent);
-void rt_refresh_begin(rtable *t, struct rt_import_request *);
-void rt_refresh_end(rtable *t, struct rt_import_request *);
+void rt_refresh_begin(struct rt_import_request *);
+void rt_refresh_end(struct rt_import_request *);
void rt_modify_stale(rtable *t, struct rt_import_request *);
-void rt_schedule_prune(rtable *t);
+void rt_schedule_prune(struct rtable_private *t);
void rte_dump(struct rte_storage *);
-void rte_free(struct rte_storage *);
-struct rte_storage *rte_store(const rte *, net *net, rtable *);
void rt_dump(rtable *);
void rt_dump_all(void);
void rt_dump_hooks(rtable *);
@@ -399,6 +572,8 @@ void rt_reload_channel_abort(struct channel *c);
void rt_refeed_channel(struct channel *c);
void rt_prune_sync(rtable *t, int all);
struct rtable_config *rt_new_table(struct symbol *s, uint addr_type);
+void rt_new_default_table(struct symbol *s);
+struct rtable_config *rt_get_default_table(struct config *cf, uint addr_type);
static inline int rt_is_ip(rtable *tab)
{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); }
@@ -448,7 +623,7 @@ struct rt_show_data {
void rt_show(struct rt_show_data *);
struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name);
-struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t);
+struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t);
/* Value of table definition mode in struct rt_show_data */
#define RSD_TDB_DEFAULT 0 /* no table specified */
@@ -475,7 +650,7 @@ struct hostentry_adata {
};
void
-ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]);
+ea_set_hostentry(ea_list **to, rtable *dep, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]);
void ea_show_hostentry(const struct adata *ad, byte *buf, uint size);
void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad);