summaryrefslogtreecommitdiff
path: root/proto/bgp/bgp.h
diff options
context:
space:
mode:
Diffstat (limited to 'proto/bgp/bgp.h')
-rw-r--r--proto/bgp/bgp.h183
1 files changed, 103 insertions, 80 deletions
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 60f93bce..b76ffe99 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -14,13 +14,12 @@
#include <stdint.h>
#include <setjmp.h>
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/bfd.h"
//#include "lib/lists.h"
#include "lib/hash.h"
#include "lib/socket.h"
-struct linpool;
struct eattr;
@@ -68,10 +67,10 @@ struct bgp_af_desc {
u8 no_igp;
const char *name;
uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
- void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a);
void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to);
uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size);
- void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, ea_list **to);
};
@@ -86,6 +85,7 @@ struct bgp_config {
int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */
int multihop; /* Number of hops if multihop */
int strict_bind; /* Bind listening socket to local address */
+ int free_bind; /* Bind listening socket with SKF_FREEBIND */
int ttl_security; /* Enable TTL security [RFC 5082] */
int compare_path_lengths; /* Use path lengths when selecting best route */
int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */
@@ -113,6 +113,8 @@ struct bgp_config {
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */
int setkey; /* Set MD5 password to system SA/SP database */
+ u8 local_role; /* Set peering role with neighbor [RFC 9234] */
+ int require_roles; /* Require configured roles on both sides */
/* Times below are in seconds */
unsigned gr_time; /* Graceful restart timeout */
unsigned llgr_time; /* Long-lived graceful restart stale time */
@@ -146,6 +148,7 @@ struct bgp_channel_config {
u8 mandatory; /* Channel is mandatory in capability negotiation */
u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
+ u8 validate; /* Validate Flowspec per RFC 8955 (6) */
u8 gr_able; /* Allow full graceful restart for the channel */
u8 llgr_able; /* Allow full long-lived GR for the channel */
uint llgr_time; /* Long-lived graceful restart stale time */
@@ -155,15 +158,23 @@ struct bgp_channel_config {
u8 aigp_originate; /* AIGP is originated automatically */
u32 cost; /* IGP cost for direct next hops */
u8 import_table; /* Use c.in_table as Adj-RIB-In */
- u8 export_table; /* Use c.out_table as Adj-RIB-Out */
+ u8 export_table; /* Keep Adj-RIB-Out and export it */
struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+ struct rtable_config *base_table; /* Base table for Flowspec validation */
};
#define BGP_PT_INTERNAL 1
#define BGP_PT_EXTERNAL 2
+#define BGP_ROLE_UNDEFINED 255
+#define BGP_ROLE_PROVIDER 0
+#define BGP_ROLE_RS_SERVER 1
+#define BGP_ROLE_RS_CLIENT 2
+#define BGP_ROLE_CUSTOMER 3
+#define BGP_ROLE_PEER 4
+
#define NH_NO 0
#define NH_ALL 1
#define NH_IBGP 2
@@ -224,6 +235,7 @@ struct bgp_caps {
u8 ext_messages; /* Extended message length, RFC draft */
u8 route_refresh; /* Route refresh capability, RFC 2918 */
u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */
+ u8 role; /* BGP role capability, RFC 9234 */
u8 gr_aware; /* Graceful restart capability, RFC 4724 */
u8 gr_flags; /* Graceful restart flags */
@@ -317,6 +329,7 @@ struct bgp_proto {
struct bgp_socket *sock; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */
+ event *uncork_ev; /* Uncork event in case of congestion */
struct bgp_stats stats; /* BGP statistics */
btime last_established; /* Last time of enter/leave of established state */
btime last_rx_update; /* Last time of RX update */
@@ -344,15 +357,12 @@ struct bgp_channel {
rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+ rtable *base_table; /* Base table for Flowspec validation */
/* Rest are zeroed when down */
pool *pool;
- HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
- struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
- list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
-
- HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
- slab *prefix_slab; /* Slab holding prefix nodes */
+ struct bgp_pending_tx *ptx; /* Routes waiting to be sent */
+ struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@@ -373,11 +383,16 @@ struct bgp_channel {
u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */
u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */
+
+ u8 feed_out_table; /* Refeed into out_table */
};
struct bgp_prefix {
- node buck_node; /* Node in per-bucket list */
+ node buck_node_xx; /* Node in per-bucket list */
struct bgp_prefix *next; /* Node in prefix hash table */
+ struct bgp_bucket *last; /* Last bucket sent with this prefix */
+ struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */
+ btime lastmod; /* Last modification of this prefix */
u32 hash;
u32 path_id;
net_addr net[0];
@@ -386,11 +401,24 @@ struct bgp_prefix {
struct bgp_bucket {
node send_node; /* Node in send queue */
struct bgp_bucket *next; /* Node in bucket hash table */
- list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */
+ list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */
u32 hash; /* Hash over extended attributes */
+ u32 px_uc; /* How many prefixes are linking this bucket */
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
+struct bgp_pending_tx {
+ resource r;
+ pool *pool;
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
+
+ HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+};
+
struct bgp_export_state {
struct bgp_proto *proto;
struct bgp_channel *channel;
@@ -401,7 +429,7 @@ struct bgp_export_state {
int mpls;
u32 attrs_seen[1];
- uint err_withdraw;
+ uint err_reject;
uint local_next_hop;
};
@@ -427,6 +455,7 @@ struct bgp_parse_state {
int as4_session;
int add_path;
int mpls;
+ int reach_nlri_step;
u32 attrs_seen[256/32];
@@ -453,13 +482,12 @@ struct bgp_parse_state {
uint err_subcode;
jmp_buf err_jmpbuf;
- struct hostentry *hostentry;
adata *mpls_labels;
/* Cached state for bgp_rte_update() */
u32 last_id;
struct rte_src *last_src;
- rta *cached_rta;
+ ea_list *cached_ea;
};
#define BGP_PORT 179
@@ -484,6 +512,12 @@ static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+static inline int bgp_channel_is_role_applicable(struct bgp_channel *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
+static inline int bgp_cc_is_role_applicable(struct bgp_channel_config *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
static inline uint bgp_max_packet_length(struct bgp_conn *conn)
{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
@@ -494,9 +528,6 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode)
longjmp(s->err_jmpbuf, 1);
}
-extern struct linpool *bgp_linpool;
-extern struct linpool *bgp_linpool2;
-
void bgp_start_timer(timer *t, uint value);
void bgp_check_config(struct bgp_config *c);
@@ -518,9 +549,14 @@ struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
static inline int
-rta_resolvable(rta *a)
+rte_resolvable(const rte *rt)
{
- return a->dest == RTD_UNICAST;
+ eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop);
+ if (!nhea)
+ return 0;
+
+ struct nexthop_adata *nhad = (void *) nhea->u.ptr;
+ return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE);
}
extern struct rte_owner_class bgp_rte_owner_class;
@@ -539,61 +575,39 @@ extern struct rte_owner_class bgp_rte_owner_class;
/* attrs.c */
-static inline eattr *
-bgp_find_attr(ea_list *attrs, uint code)
-{
- return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code));
-}
-
eattr *
-bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val);
-
-static inline void
-bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
-
-static inline void
-bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+bgp_find_attr(ea_list *attrs, uint code);
-static inline void
-bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len)
-{
- struct adata *a = lp_alloc_adata(pool, len);
- bmemcpy(a->data, data, len);
- bgp_set_attr(to, pool, code, flags, (uintptr_t) a);
-}
-
-static inline void
-bgp_unset_attr(ea_list **to, struct linpool *pool, uint code)
-{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; }
+void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val);
+void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad);
+void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len);
+void bgp_unset_attr(ea_list **to, uint code);
int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
-void bgp_finish_attrs(struct bgp_parse_state *s, rta *a);
+void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
+
+void bgp_setup_out_table(struct bgp_channel *c);
+
+void bgp_init_pending_tx(struct bgp_channel *c);
+void bgp_free_pending_tx(struct bgp_channel *c);
-void bgp_init_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b);
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_init_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
+void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
int bgp_rte_better(struct rte *, struct rte *);
int bgp_rte_mergable(rte *pri, rte *sec);
-int bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best);
-void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint);
-u32 bgp_rte_igp_metric(struct rte *);
+int bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best);
+void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count);
+u32 bgp_rte_igp_metric(const rte *);
void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old);
int bgp_preexport(struct channel *, struct rte *);
-int bgp_get_attr(const struct eattr *e, byte *buf, int buflen);
void bgp_get_route_info(struct rte *, byte *);
-int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad);
+int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad);
static inline struct bgp_proto *bgp_rte_proto(struct rte *rte)
{
@@ -605,15 +619,17 @@ static inline struct bgp_proto *bgp_rte_proto(struct rte *rte)
#define BGP_AIGP_MAX U64(0xffffffffffffffff)
static inline u64
-bgp_total_aigp_metric(rta *a)
+bgp_total_aigp_metric(const rte *e)
{
u64 metric = BGP_AIGP_MAX;
const struct adata *ad;
- bgp_total_aigp_metric_(a, &metric, &ad);
+ bgp_total_aigp_metric_(e, &metric, &ad);
return metric;
}
+void bgp_register_attrs(void);
+
/* packets.c */
@@ -625,6 +641,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
void bgp_kick_tx(void *vconn);
void bgp_tx(struct birdsock *sk);
int bgp_rx(struct birdsock *sk, uint size);
+void bgp_uncork(void *vp);
const char * bgp_error_dsc(unsigned code, unsigned subcode);
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
@@ -650,26 +667,32 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */
-#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */
-#define BA_AS_PATH 0x02 /* WM */
-#define BA_NEXT_HOP 0x03 /* WM */
-#define BA_MULTI_EXIT_DISC 0x04 /* ON */
-#define BA_LOCAL_PREF 0x05 /* WD */
-#define BA_ATOMIC_AGGR 0x06 /* WD */
-#define BA_AGGREGATOR 0x07 /* OT */
-#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */
-#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */
-#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */
-#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */
-#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */
-#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */
-#define BA_AS4_PATH 0x11 /* RFC 6793 */
-#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
-#define BA_AIGP 0x1a /* RFC 7311 */
-#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
+enum bgp_attr_id {
+ BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */
+ BA_AS_PATH = 0x02, /* WM */
+ BA_NEXT_HOP = 0x03, /* WM */
+ BA_MULTI_EXIT_DISC = 0x04, /* ON */
+ BA_LOCAL_PREF = 0x05, /* WD */
+ BA_ATOMIC_AGGR = 0x06, /* WD */
+ BA_AGGREGATOR = 0x07, /* OT */
+ BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */
+ BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */
+ BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */
+ BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */
+ BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */
+ BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */
+ BA_AS4_PATH = 0x11, /* RFC 6793 */
+ BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */
+ BA_AIGP = 0x1a, /* RFC 7311 */
+ BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */
+#define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */
/* Bird's private internal BGP attributes */
-#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */
+ BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */
+
+/* Maximum */
+ BGP_ATTR_MAX,
+};
/* BGP connection states */