diff options
Diffstat (limited to 'nest/route.h')
-rw-r--r-- | nest/route.h | 521 |
1 files changed, 369 insertions, 152 deletions
diff --git a/nest/route.h b/nest/route.h index f5fc9e31..f060ad63 100644 --- a/nest/route.h +++ b/nest/route.h @@ -2,6 +2,7 @@ * BIRD Internet Routing Daemon -- Routing Table * * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2019--2021 Maria Matejka <mq@jmq.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -10,13 +11,19 @@ #define _BIRD_ROUTE_H_ #include "lib/lists.h" +#include "lib/event.h" #include "lib/bitmap.h" #include "lib/resource.h" #include "lib/net.h" +#include "lib/hash.h" +#include "lib/event.h" + +#include <stdatomic.h> struct ea_list; struct protocol; struct proto; +struct channel; struct rte_src; struct symbol; struct timer; @@ -139,61 +146,90 @@ void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src) * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. */ -struct rtable_config { - node n; - char *name; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ -}; - -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ +typedef struct rtable_private { +#define RTABLE_PUBLIC \ + resource r; \ + node n; /* Node in list of all tables */ \ + struct birdloop *loop; /* This loop runs the table */ \ + char *name; /* Name of this table */ \ + uint addr_type; /* Type of address data stored in table (NET_*) */ \ + struct rtable_config *config; /* Configuration of this table */ \ + struct event *nhu_event; /* Event to update next hops */ \ + _Atomic byte nhu_state; /* Next Hop Update state */ \ + + RTABLE_PUBLIC; pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; - char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int pipe_busy; /* Pipe loop detection */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ + u32 rr_count; /* Number of running route refresh requests */ - byte internal; /* Internal table of a protocol */ + list imports; /* Registered route importers */ + list exports; /* Registered route exporters */ struct hmap id_map; struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ - struct config *deleted; /* Table doesn't exist in current configuration, - * delete as soon as use_count becomes 0 and remove - * obstacle from this routing table. - */ - struct event *rt_event; /* Routing table event */ + struct event *prune_event; /* Event to prune abandoned routes */ + struct event *announce_event; /* Event to announce pending exports */ + struct event *ec_event; /* Event to prune finished exports */ + struct event *hcu_event; /* Event to update host cache */ + void (*delete)(void *); /* Delete callback (in parent loop context) */ btime last_rt_change; /* Last time when route changed */ btime base_settle_time; /* Start time of rtable settling interval */ btime gc_time; /* Time of last GC */ int gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte hcu_scheduled; /* Hostcache update is scheduled */ - byte nhu_state; /* Next Hop Update state */ + + byte cork_active; /* Congestion control activated */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ + + linpool *nhu_lp; /* Linpool used for NHU */ list subscribers; /* Subscribers for notifications */ struct timer *settle_timer; /* Settle time for notifications */ + + list pending_exports; /* List of packed struct rt_pending_export */ + + struct rt_pending_export *first_export; /* First export to announce */ + u64 next_export_seq; /* The next export will have this ID */ +} rtable_private; + +typedef union { + struct { RTABLE_PUBLIC }; + rtable_private priv; } rtable; +#define RT_LOCK(tab) ({ birdloop_enter((tab)->loop); &(tab)->priv; }) +#define RT_UNLOCK(tab) birdloop_leave((tab)->loop) +#define RT_PRIV(tab) ({ ASSERT_DIE(birdloop_inside((tab)->loop)); &(tab)->priv; }) + +#define RT_LOCKED(tpub, tpriv) for (rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL)) + +struct rtable_config { + node n; + char *name; + void *owner; /* Main config if global table, channel_aux_table if channel table */ + rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int gc_max_ops; /* Maximum number of operations before GC is run */ + int gc_min_time; /* Minimum time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ + btime min_rr_settle_time; /* Minimum settle time for notifications when route refresh is running */ + btime max_rr_settle_time; /* Maximum settle time for notifications when route refresh is running */ + uint cork_limit; /* Amount of routes to be pending on export to cork imports */ +}; + struct rt_subscription { node n; rtable *tab; - void (*hook)(struct rt_subscription *b); - void *data; + event *event; }; #define NHU_CLEAN 0 @@ -202,7 +238,8 @@ struct rt_subscription { #define NHU_DIRTY 3 typedef struct network { - struct rte *routes; /* Available routes for this network */ + struct rte_storage *routes; /* Available routes for this network */ + struct rt_pending_export *last, *first; /* Routes with unfinished exports */ struct fib_node n; /* FIB flags reserved for kernel syncer */ } net; @@ -223,7 +260,7 @@ struct hostentry { ip_addr addr; /* IP address of host, part of key */ ip_addr link; /* (link-local) IP address of host, used as gw if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ + rtable *tab; /* Dependent table, part of key */ struct hostentry *next; /* Next in hash chain */ unsigned hash_key; /* Hash key */ unsigned uc; /* Use count */ @@ -234,64 +271,178 @@ struct hostentry { }; typedef struct rte { - struct rte *next; - net *net; /* Network this RTE belongs to */ - struct channel *sender; /* Channel used to send the route to the routing table */ struct rta *attrs; /* Attributes of this route */ + const net_addr *net; /* Network this RTE belongs to */ + struct rte_src *src; /* Route source that created the route */ + struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ + btime lastmod; /* Last modified (set by table) */ u32 id; /* Table specific route id */ - byte flags; /* Flags (REF_...) */ + byte flags; /* Table-specific flags */ byte pflags; /* Protocol-specific flags */ - word pref; /* Route preference */ - btime lastmod; /* Last modified */ - union { /* Protocol-dependent data (metrics etc.) */ -#ifdef CONFIG_RIP - struct { - struct iface *from; /* Incoming iface */ - u8 metric; /* RIP metric */ - u16 tag; /* External route tag */ - } rip; -#endif -#ifdef CONFIG_OSPF - struct { - u32 metric1, metric2; /* OSPF Type 1 and Type 2 metrics */ - u32 tag; /* External route tag */ - u32 router_id; /* Router that originated this route */ - } ospf; -#endif -#ifdef CONFIG_BGP - struct { - u8 suppressed; /* Used for deterministic MED comparison */ - s8 stale; /* Route is LLGR_STALE, -1 if unknown */ - } bgp; -#endif -#ifdef CONFIG_BABEL - struct { - u16 seqno; /* Babel seqno */ - u16 metric; /* Babel metric */ - u64 router_id; /* Babel router id */ - } babel; -#endif - struct { /* Routes generated by krt sync (both temporary and inherited ones) */ - s8 src; /* Alleged route source (see krt.h) */ - u8 proto; /* Kernel source protocol ID */ - u8 seen; /* Seen during last scan */ - u8 best; /* Best route in network, propagated to core */ - u32 metric; /* Kernel metric */ - } krt; - } u; + u8 generation; /* If this route import is based on other previously exported route, + this value should be 1 + MAX(generation of the parent routes). + Otherwise the route is independent and this value is zero. */ + u8 stale_cycle; /* Auxiliary value for route refresh */ } rte; -#define REF_COW 1 /* Copy this rte on write */ +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTES_CLONE(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) +#define RTES_OR_NULL(r) ((r) ? &((r)->rte) : NULL) + #define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ -#define REF_MODIFY 16 /* Route is scheduled for modify */ +#define REF_USE_STALE 4 /* Do not reset route's stale_cycle to the actual value */ /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ -static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } +static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); } /* Route just has REF_FILTERED flag */ -static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } +static inline int rte_is_filtered(const rte *r) { return !!(r->flags & REF_FILTERED); } + + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + event_list *list; /* Where to schedule import events */ + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + u64 flush_seq; /* Table export seq when the channel announced flushing */ + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ + + struct event *export_announce_event; /* Event to run to announce new exports */ + struct event *stopped; /* Event to run when import is stopped */ +}; + +struct rt_pending_export { + struct rt_pending_export * _Atomic next; /* Next export for the same destination */ + struct rte_storage *new, *new_best, *old, *old_best; + u64 seq; /* Sequential ID (table-local) of the pending export */ +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + u8 trace_routes; + + event_list *list; /* Where to schedule export events */ + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + rtable *table; /* The connected table */ + + pool *pool; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + + struct bmap seq_map; /* Keep track which exports were already procesed */ + + struct rt_pending_export * _Atomic last_export;/* Last export processed */ + struct rt_pending_export *rpe_next; /* Next pending export to process */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +extern struct event_cork rt_cork; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_HUNGRY 1 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(rtable *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, struct event *stopped); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + +/* Get next rpe. If src is given, it must match. */ +struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); + +/* Mark the pending export processed */ +void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Get pending export seen status */ +int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); /* Types of route announcement, also used as flags */ @@ -307,56 +458,75 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); #define RIC_REJECT -1 /* Rejected by protocol */ #define RIC_DROP -2 /* Silently dropped by protocol */ +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + extern list routing_tables; struct config; void rt_init(void); void rt_preconfig(struct config *); void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); +void rt_lock_table(rtable_private *); +void rt_unlock_table(rtable_private *); void rt_subscribe(rtable *tab, struct rt_subscription *s); void rt_unsubscribe(struct rt_subscription *s); rtable *rt_setup(pool *, struct rtable_config *); -static inline void rt_shutdown(rtable *r) { rfree(r->rp); } -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) -{ net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); +static inline net *net_find(rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable_private *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } +static inline net *net_get(rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +void *net_route(rtable_private *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -rte *rte_find(net *net, struct rte_src *src); -rte *rte_get_temp(struct rta *); -void rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -/* rte_update() moved to protocol.h to avoid dependency conflicts */ -int rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter); -rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct channel *c); -void rt_refresh_end(rtable *t, struct channel *c); -void rt_modify_stale(rtable *t, struct channel *c); -void rt_schedule_prune(rtable *t); -void rte_dump(rte *); -void rte_free(rte *); -rte *rte_do_cow(rte *); -static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; } -rte *rte_cow_rta(rte *r, linpool *lp); -void rte_init_tmp_attrs(struct rte *r, linpool *lp, uint max); -void rte_make_tmp_attr(struct rte *r, uint id, uint type, uintptr_t val); -void rte_make_tmp_attrs(struct rte **r, struct linpool *pool, struct rta **old_attrs); -uintptr_t rte_store_tmp_attr(struct rte *r, uint id); +int rt_examine(rtable_private *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); + +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); +void rt_schedule_prune(rtable_private *t); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *, rtable_private *); +struct rte_storage *rte_store(const rte *, net *net, rtable_private *); void rt_dump(rtable *); void rt_dump_all(void); -int rt_feed_channel(struct channel *c); -void rt_feed_channel_abort(struct channel *c); -int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -int rt_reload_channel(struct channel *c); -void rt_reload_channel_abort(struct channel *c); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); void rt_prune_sync(rtable *t, int all); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); - /* Default limit for ECMP next hops, defined in sysdep code */ extern const int rt_default_ecmp; @@ -379,6 +549,7 @@ struct rt_show_data { struct channel *export_channel; struct config *running_on_config; struct krt_proto *kernel; + struct rt_export_hook *kernel_export_hook; int export_mode, primary_only, filtered, stats, show_for; int table_open; /* Iteration (fit) is open */ @@ -430,30 +601,29 @@ struct nexthop { struct rte_src { struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ + struct rte_owner *owner; /* Route source owner */ u32 private_id; /* Private ID, assigned by the protocol */ u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ + _Atomic u64 uc; /* Use count */ }; typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ + struct rta * _Atomic next, * _Atomic *pprev; /* Hash chain */ + _Atomic u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ struct ea_list *eattrs; /* Extended Attribute chain */ - struct rte_src *src; /* Route source that created the route */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ ip_addr from; /* Advertising router */ u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - u8 source; /* Route source (RTS_...) */ - u8 scope; /* Route scope (SCOPE_... -- see ip.h) */ - u8 dest; /* Route destination type (RTD_...) */ - u8 aflags; + u16 cached:1; /* Are attributes cached? */ + u16 source:7; /* Route source (RTS_...) */ + u16 scope:4; /* Route scope (SCOPE_... -- see ip.h) */ + u16 dest:4; /* Route destination type (RTD_...) */ + word pref; struct nexthop nh; /* Next hop */ } rta; -#define RTS_DUMMY 0 /* Dummy route to be removed soon */ #define RTS_STATIC 1 /* Normal static route */ #define RTS_INHERIT 2 /* Route inherited from kernel */ #define RTS_DEVICE 3 /* Device route */ @@ -471,11 +641,6 @@ typedef struct rta { #define RTS_PERF 15 /* Perf checker */ #define RTS_MAX 16 -#define RTC_UNICAST 0 -#define RTC_BROADCAST 1 -#define RTC_MULTICAST 2 -#define RTC_ANYCAST 3 /* IPv6 Anycast */ - #define RTD_NONE 0 /* Undefined next hop */ #define RTD_UNICAST 1 /* Next hop is neighbor router */ #define RTD_BLACKHOLE 2 /* Silently drop packets */ @@ -483,8 +648,6 @@ typedef struct rta { #define RTD_PROHIBIT 4 /* Administratively prohibited */ #define RTD_MAX 5 -#define RTAF_CACHED 1 /* This is a cached rta */ - #define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other protocol-specific metric is availabe */ @@ -508,8 +671,8 @@ typedef struct eattr { byte flags; /* Protocol-dependent flags */ byte type; /* Attribute type and several flags (EAF_...) */ union { - u32 data; - const struct adata *ptr; /* Attribute data elsewhere */ + uintptr_t data; + const struct adata *ptr; /* Attribute data elsewhere */ } u; } eattr; @@ -517,7 +680,6 @@ typedef struct eattr { #define EA_CODE(proto,id) (((proto) << 8) | (id)) #define EA_ID(ea) ((ea) & 0xff) #define EA_PROTO(ea) ((ea) >> 8) -#define EA_ID_FLAG(ea) (1 << EA_ID(ea)) #define EA_CUSTOM(id) ((id) | EA_CUSTOM_BIT) #define EA_IS_CUSTOM(ea) ((ea) & EA_CUSTOM_BIT) #define EA_CUSTOM_ID(ea) ((ea) & ~EA_CUSTOM_BIT) @@ -540,6 +702,7 @@ const char *ea_custom_name(uint ea); #define EAF_TYPE_AS_PATH 0x06 /* BGP AS path (encoding per RFC 1771:4.3) */ #define EAF_TYPE_BITFIELD 0x09 /* 32-bit embedded bitfield */ #define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */ +#define EAF_TYPE_PTR 0x0d /* Pointer to an object */ #define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community list */ #define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community list */ #define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ @@ -578,13 +741,52 @@ typedef struct ea_list { #define EALF_SORTED 1 /* Attributes are sorted by code */ #define EALF_BISECT 2 /* Use interval bisection for searching */ #define EALF_CACHED 4 /* Attributes belonging to cached rta */ -#define EALF_TEMP 8 /* Temporary ea_list added by make_tmp_attrs hooks */ -struct rte_src *rt_find_source(struct proto *p, u32 id); -struct rte_src *rt_get_source(struct proto *p, u32 id); -static inline void rt_lock_source(struct rte_src *src) { src->uc++; } -static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } -void rt_prune_sources(void); +struct rte_owner_class { + void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ + int (*rte_better)(struct rte *, struct rte *); + int (*rte_mergable)(struct rte *, struct rte *); + u32 (*rte_igp_metric)(struct rte *); +}; + +struct rte_owner { + struct rte_owner_class *class; + int (*rte_recalculate)(rtable_private *, struct network *, struct rte *, struct rte *, struct rte *); + HASH(struct rte_src) hash; + const char *name; + u32 hash_key; + u32 uc; + event_list *list; + event *prune; + event *stop; +}; + +#define RTE_SRC_PU_SHIFT 44 +#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) + +struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); +#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) +static inline void rt_lock_source(struct rte_src *src) +{ + u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); +} + +static inline void rt_unlock_source(struct rte_src *src) +{ + u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel); + u64 pending = uc >> RTE_SRC_PU_SHIFT; + uc &= RTE_SRC_IN_PROGRESS - 1; + + ASSERT_DIE(uc > pending); + if (uc == pending + 1) + ev_send(src->owner->list, src->owner->prune); + + atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel); +} + +void rt_init_sources(struct rte_owner *, const char *name, event_list *list); +void rt_destroy_sources(struct rte_owner *, event *); struct ea_walk_state { ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ @@ -594,7 +796,7 @@ struct ea_walk_state { eattr *ea_find(ea_list *, unsigned ea); eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); -int ea_get_int(ea_list *, unsigned ea, int def); +uintptr_t ea_get_int(ea_list *, unsigned ea, uintptr_t def); void ea_dump(ea_list *); void ea_sort(ea_list *); /* Sort entries in all sub-lists */ unsigned ea_scan(ea_list *); /* How many bytes do we need for merged ea_list */ @@ -673,24 +875,39 @@ void rta_init(void); static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } #define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ -static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } -static inline rta *rta_clone(rta *r) { r->uc++; return r; } +static inline int rta_is_cached(rta *r) { return r->cached; } + +static inline rta *rta_clone(rta *r) { + u32 uc = atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); + return r; +} + void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } +static inline void rta_free(rta *r) { + if (!r) + return; + + u32 uc = atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel); + if (uc == 1) + rta__free(r); +} + rta *rta_do_cow(rta *o, linpool *lp); static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } -void rta_dump(rta *); +static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; } +void rta_dump(const rta *); void rta_dump_all(void); -void rta_show(struct cli *, rta *); +void rta_show(struct cli *, const rta *); -u32 rt_get_igp_metric(rte *rt); +u32 rt_get_igp_metric(rte *); struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); -void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); +void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls, linpool *lp); static inline void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls, linpool *lp) { - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls, lp); } /* |