summaryrefslogtreecommitdiff
path: root/proto
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-10-04 15:59:15 +0200
committerMaria Matejka <mq@ucw.cz>2022-10-04 15:59:15 +0200
commit0eba27c69fbbd3cd701debc1f1254b51d1906388 (patch)
treedc2ef1413c67dffa09f6c01e3ca27e59267917bb /proto
parent4e1c582caddb6876b9fcbdc29b8bad5ab0b09ee7 (diff)
parenta32cee7813dc4e69f480d1b5fa574a159aded5f7 (diff)
Merge commit 'a32cee78' into HEAD
Diffstat (limited to 'proto')
-rw-r--r--proto/bgp/attrs.c205
-rw-r--r--proto/bgp/bgp.c37
-rw-r--r--proto/bgp/bgp.h46
-rw-r--r--proto/bgp/config.Y16
-rw-r--r--proto/bgp/packets.c56
5 files changed, 275 insertions, 85 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index e96b175d..2543ee73 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -939,6 +939,18 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla
bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad);
}
+
+static void
+bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
+{
+ if (len != 4)
+ WITHDRAW(BAD_LENGTH, "OTC", len);
+
+ u32 val = get_u32(data);
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, flags, val);
+}
+
+
static void
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
{
@@ -1175,6 +1187,13 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = {
.encode = bgp_encode_u32s,
.decode = bgp_decode_large_community,
},
+ [BA_ONLY_TO_CUSTOMER] = {
+ .name = "otc",
+ .type = T_INT,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .encode = bgp_encode_u32,
+ .decode = bgp_decode_otc,
+ },
[BA_MPLS_LABEL_STACK] = {
.name = "bgp_mpls_label_stack",
.type = T_CLIST,
@@ -1504,6 +1523,29 @@ bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
REPORT("Discarding AIGP attribute received on non-AIGP session");
bgp_unset_attr(to, BA_AIGP);
}
+
+ /* Handle OTC ingress procedure, RFC 9234 */
+ if (bgp_channel_is_role_applicable(s->channel))
+ {
+ struct bgp_proto *p = s->proto;
+ eattr *e = bgp_find_attr(*to, BA_ONLY_TO_CUSTOMER);
+
+ /* Reject routes from downstream if they are leaked */
+ if (e && (p->cf->local_role == BGP_ROLE_PROVIDER ||
+ p->cf->local_role == BGP_ROLE_RS_SERVER))
+ WITHDRAW("Route leak detected - OTC attribute from downstream");
+
+ /* Reject routes from peers if they are leaked */
+ if (e && (p->cf->local_role == BGP_ROLE_PEER) && (e->u.data != p->cf->remote_as))
+ WITHDRAW("Route leak detected - OTC attribute with mismatched ASN (%u)",
+ (uint) e->u.data);
+
+ /* Mark routes from upstream if it did not happened before */
+ if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER ||
+ p->cf->local_role == BGP_ROLE_PEER ||
+ p->cf->local_role == BGP_ROLE_RS_CLIENT))
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as);
+ }
}
@@ -1522,8 +1564,8 @@ bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
-void
-bgp_init_bucket_table(struct bgp_channel *c)
+static void
+bgp_init_bucket_table(struct bgp_pending_tx *c)
{
HASH_INIT(c->bucket_hash, c->pool, 8);
@@ -1531,24 +1573,8 @@ bgp_init_bucket_table(struct bgp_channel *c)
c->withdraw_bucket = NULL;
}
-void
-bgp_free_bucket_table(struct bgp_channel *c)
-{
- HASH_FREE(c->bucket_hash);
-
- struct bgp_bucket *b;
- WALK_LIST_FIRST(b, c->bucket_queue)
- {
- rem_node(&b->send_node);
- mb_free(b);
- }
-
- mb_free(c->withdraw_bucket);
- c->withdraw_bucket = NULL;
-}
-
static struct bgp_bucket *
-bgp_get_bucket(struct bgp_channel *c, ea_list *new)
+bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new)
{
/* Hash and lookup */
u32 hash = ea_hash(new);
@@ -1577,7 +1603,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new)
}
static struct bgp_bucket *
-bgp_get_withdraw_bucket(struct bgp_channel *c)
+bgp_get_withdraw_bucket(struct bgp_pending_tx *c)
{
if (!c->withdraw_bucket)
{
@@ -1589,15 +1615,17 @@ bgp_get_withdraw_bucket(struct bgp_channel *c)
}
static void
-bgp_free_bucket_xx(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b)
{
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
int
-bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
+ struct bgp_pending_tx *c = bc->ptx;
+
/* Won't free the withdraw bucket */
if (b == c->withdraw_bucket)
return 0;
@@ -1608,21 +1636,23 @@ bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
if (b->px_uc || !EMPTY_LIST(b->prefixes))
return 0;
- bgp_free_bucket_xx(c, b);
+ bgp_free_bucket(c, b);
return 1;
}
void
-bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
+ struct bgp_pending_tx *c = bc->ptx;
rem_node(&b->send_node);
add_tail(&c->bucket_queue, &b->send_node);
}
void
-bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
- struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_proto *p = (void *) bc->c.proto;
+ struct bgp_pending_tx *c = bc->ptx;
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
log(L_ERR "%s: Attribute list too long", p->p.name);
@@ -1643,7 +1673,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
-#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (c->add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
@@ -1652,29 +1682,21 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
-void
-bgp_init_prefix_table(struct bgp_channel *c)
+static void
+bgp_init_prefix_table(struct bgp_channel *bc)
{
+ struct bgp_pending_tx *c = bc->ptx;
HASH_INIT(c->prefix_hash, c->pool, 8);
- uint alen = net_addr_length[c->c.net_type];
+ uint alen = net_addr_length[bc->c.net_type];
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
-void
-bgp_free_prefix_table(struct bgp_channel *c)
-{
- HASH_FREE(c->prefix_hash);
-
- rfree(c->prefix_slab);
- c->prefix_slab = NULL;
-}
-
static struct bgp_prefix *
-bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src)
+bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx)
{
u32 path_id = src->global_id;
- u32 path_id_hash = c->add_path_tx ? path_id : 0;
+ u32 path_id_hash = add_path_tx ? path_id : 0;
/* We must use a different hash function than the rtable */
u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
@@ -1698,15 +1720,16 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src)
return px;
}
-static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px);
+static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px);
static inline int
bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
{
+#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket)
#define BPX_TRACE(what) do { \
if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
c->c.proto->name, c->c.name, what, \
- px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0)
+ px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0)
px->lastmod = current_time();
/* Already queued for the same bucket */
@@ -1724,7 +1747,7 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
}
/* The new bucket is the same as we sent before */
- if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket))
+ if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b))
{
if (px->cur)
BPX_TRACE("reverted");
@@ -1733,15 +1756,15 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
/* Well, we haven't sent anything yet */
if (!px->last)
- bgp_free_prefix(c, px);
+ bgp_free_prefix(c->ptx, px);
px->cur = NULL;
return 0;
}
/* Enqueue the bucket if it has been empty */
- if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes))
- add_tail(&c->bucket_queue, &b->send_node);
+ if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes))
+ add_tail(&c->ptx->bucket_queue, &b->send_node);
/* Enqueue to the new bucket and indicate the change */
add_tail(&b->prefixes, &px->buck_node_xx);
@@ -1754,7 +1777,7 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
}
static void
-bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
+bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px)
{
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
@@ -1784,7 +1807,7 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket
px->last->px_uc--;
/* Ref the current sent version */
- if (buck != c->withdraw_bucket)
+ if (!IS_WITHDRAW_BUCKET(buck))
{
px->last = buck;
px->last->px_uc++;
@@ -1794,7 +1817,49 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket
/* Prefixes belonging to the withdraw bucket are freed always */
}
- bgp_free_prefix(c, px);
+ bgp_free_prefix(c->ptx, px);
+}
+
+static void
+bgp_pending_tx_rfree(resource *r)
+{
+ struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
+
+ HASH_WALK(ptx->prefix_hash, next, n)
+ rt_unlock_source(rt_find_source_global(n->path_id));
+ HASH_WALK_END;
+}
+
+static void bgp_pending_tx_dump(resource *r UNUSED) { debug("\n"); }
+
+static struct resclass bgp_pending_tx_class = {
+ .name = "BGP Pending TX",
+ .size = sizeof(struct bgp_pending_tx),
+ .free = bgp_pending_tx_rfree,
+ .dump = bgp_pending_tx_dump,
+};
+
+void
+bgp_init_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(!c->ptx);
+
+ pool *p = rp_new(c->pool, "BGP Pending TX");
+ c->ptx = ralloc(p, &bgp_pending_tx_class);
+ c->ptx->pool = p;
+
+ bgp_init_bucket_table(c->ptx);
+ bgp_init_prefix_table(c);
+}
+
+void
+bgp_free_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->ptx);
+ ASSERT_DIE(c->ptx->pool);
+
+ rfree(c->ptx->pool);
+ c->ptx = NULL;
}
@@ -1806,7 +1871,8 @@ static void
bgp_out_table_feed(void *data)
{
struct rt_export_hook *hook = data;
- struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
+ struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
+ struct bgp_pending_tx *c = bc->ptx;
int max = 512;
@@ -1901,8 +1967,8 @@ bgp_out_table_feed(void *data)
static struct rt_export_hook *
bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED)
{
- struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
- pool *p = rp_new(c->c.proto->pool, "Export hook");
+ struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
+ pool *p = rp_new(bc->c.proto->pool, "Export hook");
struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
hook->pool = p;
hook->event = ev_new_init(p, bgp_out_table_feed, hook);
@@ -1937,6 +2003,7 @@ bgp_preexport(struct channel *C, rte *e)
{
struct bgp_proto *p = (struct bgp_proto *) C->proto;
struct bgp_proto *src = bgp_rte_proto(e);
+ struct bgp_channel *c = (struct bgp_channel *) C;
/* Reject our routes */
if (src == p)
@@ -1976,11 +2043,11 @@ bgp_preexport(struct channel *C, rte *e)
}
/* Handle well-known communities, RFC 1997 */
- struct eattr *com;
+ struct eattr *a;
if (p->cf->interpret_communities &&
- (com = ea_find(e->attrs, BGP_EA_ID(BA_COMMUNITY))))
+ (a = bgp_find_attr(e->attrs, BA_COMMUNITY)))
{
- const struct adata *d = com->u.ptr;
+ const struct adata *d = a->u.ptr;
/* Do not export anywhere */
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
@@ -1999,6 +2066,16 @@ bgp_preexport(struct channel *C, rte *e)
return -1;
}
+ /* Do not export routes marked with OTC to upstream, RFC 9234 */
+ if (bgp_channel_is_role_applicable(c))
+ {
+ a = bgp_find_attr(e->attrs, BA_ONLY_TO_CUSTOMER);
+ if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER ||
+ p->cf->local_role==BGP_ROLE_PEER ||
+ p->cf->local_role==BGP_ROLE_RS_CLIENT))
+ return -1;
+ }
+
return 0;
}
@@ -2107,6 +2184,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
}
}
+ /* Mark routes for downstream with OTC, RFC 9234 */
+ if (bgp_channel_is_role_applicable(c))
+ {
+ a = bgp_find_attr(attrs, BA_ONLY_TO_CUSTOMER);
+ if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER ||
+ p->cf->local_role == BGP_ROLE_PEER ||
+ p->cf->local_role == BGP_ROLE_RS_SERVER))
+ bgp_set_attr_u32(&attrs, BA_ONLY_TO_CUSTOMER, 0, p->public_as);
+ }
+
/*
* Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
* conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
@@ -2134,16 +2221,16 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n);
/* If attributes are invalid, we fail back to withdraw */
- buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
+ buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx);
path = new->src;
}
else
{
- buck = bgp_get_withdraw_bucket(c);
+ buck = bgp_get_withdraw_bucket(c->ptx);
path = old->src;
}
- if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck))
+ if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck))
bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index d240112c..68c788ea 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -102,6 +102,7 @@
* RFC 8212 - Default EBGP Route Propagation Behavior without Policies
* RFC 8654 - Extended Message Support for BGP
* RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
+ * RFC 9234 - Route Leak Prevention and Detection Using Roles
* draft-ietf-idr-ext-opt-param-07
* draft-uttaro-idr-bgp-persistence-04
* draft-walton-bgp-hostname-capability-02
@@ -518,6 +519,12 @@ bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->ptx)
+ bgp_free_pending_tx(c);
+
ev_schedule(p->event);
}
@@ -787,10 +794,8 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
}
/* Reset bucket and prefix tables */
- bgp_free_bucket_table(c);
- bgp_free_prefix_table(c);
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ bgp_free_pending_tx(c);
+ bgp_init_pending_tx(c);
c->packets_to_send = 0;
}
@@ -1806,8 +1811,7 @@ bgp_channel_start(struct channel *C)
if (c->cf->export_table)
bgp_setup_out_table(c);
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ bgp_init_pending_tx(c);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
@@ -2017,6 +2021,15 @@ bgp_postconfig(struct proto_config *CF)
if (internal && cf->rs_client)
cf_error("Only external neighbor can be RS client");
+ if (internal && (cf->local_role != BGP_ROLE_UNDEFINED))
+ cf_error("Local role cannot be set on IBGP sessions");
+
+ if (interior && (cf->local_role != BGP_ROLE_UNDEFINED))
+ log(L_WARN "BGP roles are not recommended to be used within AS confederations");
+
+ if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED))
+ cf_error("Local role must be set if roles are required");
+
if (!cf->confederation && cf->confederation_member)
cf_error("Confederation ID must be set for member sessions");
@@ -2379,6 +2392,15 @@ bgp_show_afis(int code, char *s, u32 *afis, uint count)
cli_msg(code, b.start);
}
+static const char *
+bgp_format_role_name(u8 role)
+{
+ static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" };
+ if (role == BGP_ROLE_UNDEFINED) return "undefined";
+ if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role];
+ return "?";
+}
+
static void
bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
{
@@ -2507,6 +2529,9 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
if (caps->hostname)
cli_msg(-1006, " Hostname: %s", caps->hostname);
+
+ if (caps->role != BGP_ROLE_UNDEFINED)
+ cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role));
}
static void
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 81382099..0cd327a2 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -113,6 +113,8 @@ struct bgp_config {
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */
int setkey; /* Set MD5 password to system SA/SP database */
+ u8 local_role; /* Set peering role with neighbor [RFC 9234] */
+ int require_roles; /* Require configured roles on both sides */
/* Times below are in seconds */
unsigned gr_time; /* Graceful restart timeout */
unsigned llgr_time; /* Long-lived graceful restart stale time */
@@ -166,6 +168,13 @@ struct bgp_channel_config {
#define BGP_PT_INTERNAL 1
#define BGP_PT_EXTERNAL 2
+#define BGP_ROLE_UNDEFINED 255
+#define BGP_ROLE_PROVIDER 0
+#define BGP_ROLE_RS_SERVER 1
+#define BGP_ROLE_RS_CLIENT 2
+#define BGP_ROLE_CUSTOMER 3
+#define BGP_ROLE_PEER 4
+
#define NH_NO 0
#define NH_ALL 1
#define NH_IBGP 2
@@ -226,6 +235,7 @@ struct bgp_caps {
u8 ext_messages; /* Extended message length, RFC draft */
u8 route_refresh; /* Route refresh capability, RFC 2918 */
u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */
+ u8 role; /* BGP role capability, RFC 9234 */
u8 gr_aware; /* Graceful restart capability, RFC 4724 */
u8 gr_flags; /* Graceful restart flags */
@@ -351,14 +361,8 @@ struct bgp_channel {
/* Rest are zeroed when down */
pool *pool;
- HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
- struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
- list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
-
- HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
- slab *prefix_slab; /* Slab holding prefix nodes */
-
- struct rt_exporter prefix_exporter; /* Table-like exporter for prefix_hash */
+ struct bgp_pending_tx *ptx; /* Routes waiting to be sent */
+ struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@@ -401,6 +405,18 @@ struct bgp_bucket {
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
+struct bgp_pending_tx {
+ resource r;
+ pool *pool;
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
+
+ HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+};
+
struct bgp_export_state {
struct bgp_proto *proto;
struct bgp_channel *channel;
@@ -494,6 +510,12 @@ static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+static inline int bgp_channel_is_role_applicable(struct bgp_channel *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
+static inline int bgp_cc_is_role_applicable(struct bgp_channel_config *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
static inline uint bgp_max_packet_length(struct bgp_conn *conn)
{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
@@ -567,13 +589,12 @@ void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
void bgp_setup_out_table(struct bgp_channel *c);
-void bgp_init_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket_table(struct bgp_channel *c);
+void bgp_init_pending_tx(struct bgp_channel *c);
+void bgp_free_pending_tx(struct bgp_channel *c);
+
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_init_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix_table(struct bgp_channel *c);
void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
int bgp_rte_better(struct rte *, struct rte *);
@@ -662,6 +683,7 @@ enum bgp_attr_id {
BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */
BA_AIGP = 0x1a, /* RFC 7311 */
BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */
+#define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */
/* Bird's private internal BGP attributes */
BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 24f3ec8f..9f0d2306 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -30,7 +30,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS,
DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE,
- FIRST, FREE, VALIDATE, BASE)
+ FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER,
+ RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC)
%type <i> bgp_nh
%type <i32> bgp_afi
@@ -39,7 +40,7 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER,
CONFIGURATION, CHANGE, DECONFIGURED, CONNECTION, REJECTED, COLLISION,
OUT, OF, RESOURCES)
-%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag
+%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag bgp_role_name
CF_GRAMMAR
@@ -73,6 +74,7 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->llgr_mode = -1;
BGP_CFG->llgr_time = 3600;
BGP_CFG->setkey = 1;
+ BGP_CFG->local_role = BGP_ROLE_UNDEFINED;
BGP_CFG->dynamic_name = "dynbgp";
BGP_CFG->check_link = -1;
}
@@ -115,6 +117,14 @@ bgp_cease_flag:
| OUT OF RESOURCES { $$ = 1 << 8; }
;
+bgp_role_name:
+ PEER { $$ = BGP_ROLE_PEER; }
+ | PROVIDER { $$ = BGP_ROLE_PROVIDER; }
+ | CUSTOMER { $$ = BGP_ROLE_CUSTOMER; }
+ | RS_SERVER { $$ = BGP_ROLE_RS_SERVER; }
+ | RS_CLIENT { $$ = BGP_ROLE_RS_CLIENT; }
+ ;
+
bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
@@ -198,6 +208,8 @@ bgp_proto:
| bgp_proto BFD GRACEFUL ';' { init_bfd_opts(&BGP_CFG->bfd); BGP_CFG->bfd->mode = BGP_BFD_GRACEFUL; }
| bgp_proto BFD { open_bfd_opts(&BGP_CFG->bfd); } bfd_opts { close_bfd_opts(); } ';'
| bgp_proto ENFORCE FIRST AS bool ';' { BGP_CFG->enforce_first_as = $5; }
+ | bgp_proto LOCAL ROLE bgp_role_name ';' { BGP_CFG->local_role = $4; }
+ | bgp_proto REQUIRE ROLES bool ';' { BGP_CFG->require_roles = $4; }
;
bgp_afi:
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 867be75f..d4d2d0b0 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -238,6 +238,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn)
caps->ext_messages = p->cf->enable_extended_messages;
caps->route_refresh = p->cf->enable_refresh;
caps->enhanced_refresh = p->cf->enable_refresh;
+ caps->role = p->cf->local_role;
if (caps->as4_support)
caps->as4_number = p->public_as;
@@ -350,6 +351,13 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
+ if (caps->role != BGP_ROLE_UNDEFINED)
+ {
+ *buf++ = 9; /* Capability 9: Announce chosen BGP role */
+ *buf++ = 1; /* Capability data length */
+ *buf++ = caps->role;
+ }
+
if (caps->gr_aware)
{
*buf++ = 64; /* Capability 64: Support for graceful restart */
@@ -449,11 +457,15 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
struct bgp_proto *p = conn->bgp;
struct bgp_caps *caps;
struct bgp_af_caps *ac;
+ uint err_subcode = 0;
int i, cl;
u32 af;
if (!conn->remote_caps)
+ {
caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps));
+ caps->role = BGP_ROLE_UNDEFINED;
+ }
else
{
caps = conn->remote_caps;
@@ -513,6 +525,21 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
caps->ext_messages = 1;
break;
+ case 9: /* BGP role capability, RFC 9234 */
+ if (cl != 1)
+ goto err;
+
+ /* Reserved value */
+ if (pos[2] == BGP_ROLE_UNDEFINED)
+ { err_subcode = 11; goto err; }
+
+ /* Multiple inconsistent values */
+ if ((caps->role != BGP_ROLE_UNDEFINED) && (caps->role != pos[2]))
+ { err_subcode = 11; goto err; }
+
+ caps->role = pos[2];
+ break;
+
case 64: /* Graceful restart capability, RFC 4724 */
if (cl % 4 != 2)
goto err;
@@ -638,7 +665,7 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
err:
mb_free(caps);
- bgp_error(conn, 2, 0, NULL, 0);
+ bgp_error(conn, 2, err_subcode, NULL, 0);
return -1;
}
@@ -854,6 +881,22 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
conn->received_as = asn;
}
+ /* RFC 9234 4.2 - check role agreement */
+ u8 local_role = p->cf->local_role;
+ u8 neigh_role = caps->role;
+
+ if ((local_role != BGP_ROLE_UNDEFINED) &&
+ (neigh_role != BGP_ROLE_UNDEFINED) &&
+ !((local_role == BGP_ROLE_PEER && neigh_role == BGP_ROLE_PEER) ||
+ (local_role == BGP_ROLE_CUSTOMER && neigh_role == BGP_ROLE_PROVIDER) ||
+ (local_role == BGP_ROLE_PROVIDER && neigh_role == BGP_ROLE_CUSTOMER) ||
+ (local_role == BGP_ROLE_RS_CLIENT && neigh_role == BGP_ROLE_RS_SERVER) ||
+ (local_role == BGP_ROLE_RS_SERVER && neigh_role == BGP_ROLE_RS_CLIENT)))
+ { bgp_error(conn, 2, 11, NULL, 0); return; }
+
+ if ((p->cf->require_roles) && (neigh_role == BGP_ROLE_UNDEFINED))
+ { bgp_error(conn, 2, 11, NULL, 0); return; }
+
/* Check the other connection */
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
switch (other->state)
@@ -2169,7 +2212,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
* var IPv4 Network Layer Reachability Information
*/
- ASSERT_DIE(s->channel->withdraw_bucket != buck);
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
int lr, la;
@@ -2192,7 +2235,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
static byte *
bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- ASSERT_DIE(s->channel->withdraw_bucket != buck);
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
/*
* 2 B IPv4 Withdrawn Routes Length (zero)
@@ -2332,7 +2375,7 @@ again: ;
};
/* Try unreachable bucket */
- if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
bgp_create_ip_unreach(&s, buck, buf, end):
@@ -2342,9 +2385,9 @@ again: ;
}
/* Try reachable buckets */
- if (!EMPTY_LIST(c->bucket_queue))
+ if (!EMPTY_LIST(c->ptx->bucket_queue))
{
- buck = HEAD(c->bucket_queue);
+ buck = HEAD(c->ptx->bucket_queue);
/* Cleanup empty buckets */
if (bgp_done_bucket(c, buck))
@@ -2977,6 +3020,7 @@ static struct {
{ 2, 6, "Unacceptable hold time" },
{ 2, 7, "Required capability missing" }, /* [RFC5492] */
{ 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
+ { 2,11, "Role mismatch" }, /* From Open Policy, RFC 9234 */
{ 3, 0, "Invalid UPDATE message" },
{ 3, 1, "Malformed attribute list" },
{ 3, 2, "Unrecognized well-known attribute" },