summaryrefslogtreecommitdiff
path: root/proto/bgp
diff options
context:
space:
mode:
Diffstat (limited to 'proto/bgp')
-rw-r--r--proto/bgp/Makefile2
-rw-r--r--proto/bgp/attrs.c1217
-rw-r--r--proto/bgp/bgp.c682
-rw-r--r--proto/bgp/bgp.h233
-rw-r--r--proto/bgp/config.Y87
-rw-r--r--proto/bgp/packets.c449
6 files changed, 1713 insertions, 957 deletions
diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile
index 00aaef5e..f6a38678 100644
--- a/proto/bgp/Makefile
+++ b/proto/bgp/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 02b07410..8bff4c78 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -15,12 +15,13 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
-#include "nest/attrs.h"
+#include "nest/rt.h"
+#include "lib/attrs.h"
#include "conf/conf.h"
#include "lib/resource.h"
#include "lib/string.h"
#include "lib/unaligned.h"
+#include "lib/macro.h"
#include "bgp.h"
@@ -45,9 +46,9 @@
*
* export - Hook that validates and normalizes attribute during export phase.
* Receives eattr, may modify it (e.g., sort community lists for canonical
- * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
- * necessary. May assume that eattr has value valid w.r.t. its type, but may be
- * invalid w.r.t. BGP constraints. Optional.
+ * representation), UNSET() it (e.g., skip empty lists), or REJECT() the route
+ * if necessary. May assume that eattr has value valid w.r.t. its type, but may
+ * be invalid w.r.t. BGP constraints. Optional.
*
* encode - Hook that converts internal representation to external one during
* packet writing. Receives eattr and puts it in the buffer (including attribute
@@ -64,37 +65,72 @@
* format - Optional hook that converts eattr to textual representation.
*/
-
-struct bgp_attr_desc {
- const char *name;
- uint type;
- uint flags;
- void (*export)(struct bgp_export_state *s, eattr *a);
- int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
- void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
- void (*format)(const eattr *ea, byte *buf, uint size);
+union bgp_attr_desc {
+ struct ea_class class;
+ struct {
+ EA_CLASS_INSIDE;
+ uint flags;
+ void (*export)(struct bgp_export_state *s, eattr *a);
+ int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
+ void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
+ };
};
-static const struct bgp_attr_desc bgp_attr_table[];
+static union bgp_attr_desc bgp_attr_table[];
+static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a)
+{
+ const struct ea_class *class = ea_class_find(a->id);
-static inline int bgp_attr_known(uint code);
+ if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class))
+ return NULL;
-eattr *
-bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
+ return (const union bgp_attr_desc *) class;
+}
+
+#define BGP_EA_ID(code) (bgp_attr_table[code].id)
+#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table)
+
+void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val)
{
- ASSERT(bgp_attr_known(code));
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
- return ea_set_attr(
- attrs,
- pool,
- EA_CODE(PROTOCOL_BGP, code),
- flags & ~BAF_EXT_LEN,
- bgp_attr_table[code].type,
- val
- );
+ ea_set_attr(to, EA_LITERAL_EMBEDDED(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ val
+ ));
}
+void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+ ea_set_attr(to, EA_LITERAL_DIRECT_ADATA(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ ad
+ ));
+}
+
+void
+bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+
+ ea_set_attr(to, EA_LITERAL_STORE_ADATA(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ data,
+ len
+ ));
+}
+
+void
+bgp_unset_attr(ea_list **to, uint code)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+ ea_unset_attr(to, 0, &desc->class);
+}
#define REPORT(msg, args...) \
({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
@@ -106,7 +142,10 @@ bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintp
({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
#define UNSET(a) \
- ({ a->type = EAF_TYPE_UNDEF; return; })
+ ({ a->undef = 1; return; })
+
+#define REJECT(msg, args...) \
+ ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; })
#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
@@ -148,7 +187,7 @@ bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
if (size < (3+1))
return -1;
- bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
+ bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1);
buf[3] = a->u.data;
return 3+1;
@@ -160,7 +199,7 @@ bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
if (size < (3+4))
return -1;
- bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
+ bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4);
put_u32(buf+3, a->u.data);
return 3+4;
@@ -174,7 +213,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size
if (size < (4+len))
return -1;
- uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
+ uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len);
put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
return hdr + len;
@@ -195,7 +234,7 @@ bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint
static int
bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
- return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
+ return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
}
@@ -333,26 +372,26 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
}
int
-bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad)
+bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad)
{
- eattr *ea = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
- if (!ea)
+ eattr *a = ea_find(e->attrs, BGP_EA_ID(BA_AIGP));
+ if (!a)
return 0;
- const byte *b = bgp_aigp_get_tlv(ea->u.ptr, BGP_AIGP_METRIC);
+ const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
if (!b)
return 0;
u64 aigp = get_u64(b + 3);
- u64 step = a->igp_metric;
+ u64 step = rt_get_igp_metric(e);
- if (!rta_resolvable(a) || (step >= IGP_METRIC_UNKNOWN))
+ if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
step = BGP_AIGP_MAX;
if (!step)
step = 1;
- *ad = ea->u.ptr;
+ *ad = a->u.ptr;
*metric = aigp + step;
if (*metric < aigp)
*metric = BGP_AIGP_MAX;
@@ -363,7 +402,7 @@ bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad)
static inline int
bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
{
- if (e->attrs->source == RTS_BGP)
+ if (rt_get_source_attr(e) == RTS_BGP)
return 0;
*metric = rt_get_igp_metric(e);
@@ -372,9 +411,9 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
}
u32
-bgp_rte_igp_metric(struct rte *rt)
+bgp_rte_igp_metric(const rte *rt)
{
- u64 metric = bgp_total_aigp_metric(rt->attrs);
+ u64 metric = bgp_total_aigp_metric(rt);
return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
}
@@ -387,7 +426,7 @@ static void
bgp_export_origin(struct bgp_export_state *s, eattr *a)
{
if (a->u.data > 2)
- WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
+ REJECT(BAD_VALUE, "ORIGIN", a->u.data);
}
static void
@@ -399,7 +438,7 @@ bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte
if (data[0] > 2)
WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
- bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
+ bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]);
}
static void
@@ -467,7 +506,7 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte
!bgp_as_path_first_as_equal(data, len, p->remote_as))
WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
- bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
+ bgp_set_attr_data(to, BA_AS_PATH, flags, data, len);
}
@@ -539,7 +578,7 @@ bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da
WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
+ bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val);
}
@@ -560,7 +599,7 @@ bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, b
WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
+ bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val);
}
@@ -570,7 +609,7 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags,
if (len != 0)
DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
- bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
+ bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0);
}
static int
@@ -604,7 +643,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b
len = aggregator_16to32(data, src);
}
- bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
+ bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len);
}
static void
@@ -633,7 +672,7 @@ bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, by
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad);
}
@@ -654,7 +693,7 @@ bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags
WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
+ bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val);
}
@@ -679,7 +718,7 @@ bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags,
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
+ bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad);
}
static void
@@ -798,7 +837,7 @@ bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad);
}
@@ -811,7 +850,7 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag
if (len != 8)
DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
- bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
+ bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len);
}
static void
@@ -841,7 +880,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt
a = as_path_strip_confed(s->pool, a);
}
- bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
+ bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a);
}
@@ -865,7 +904,7 @@ bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *d
if (!bgp_aigp_valid(data, len, err, sizeof(err)))
DISCARD("Malformed AIGP attribute - %s", err);
- bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
+ bgp_set_attr_data(to, BA_AIGP, flags, data, len);
}
static void
@@ -897,9 +936,21 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad);
+}
+
+
+static void
+bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
+{
+ if (len != 4)
+ WITHDRAW(BAD_LENGTH, "OTC", len);
+
+ u32 val = get_u32(data);
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, flags, val);
}
+
static void
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
{
@@ -909,20 +960,20 @@ bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
/* Perhaps we should just ignore it? */
if (!s->mpls)
- WITHDRAW("Unexpected MPLS stack");
+ REJECT("Unexpected MPLS stack");
/* Empty MPLS stack is not allowed */
if (!lnum)
- WITHDRAW("Malformed MPLS stack - empty");
+ REJECT("Malformed MPLS stack - empty");
/* This is ugly, but we must ensure that labels fit into NLRI field */
if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
- WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
+ REJECT("Malformed MPLS stack - too many labels (%u)", lnum);
for (uint i = 0; i < lnum; i++)
{
if (labels[i] > 0xfffff)
- WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
+ REJECT("Malformed MPLS stack - invalid label (%u)", labels[i]);
/* TODO: Check for special-purpose label values? */
}
@@ -970,10 +1021,29 @@ bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size)
}
static inline void
-bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a)
{
+ if (!(a->flags & BAF_TRANSITIVE))
+ UNSET(a);
+
+ a->flags |= BAF_PARTIAL;
+}
+
+static inline void
+bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to)
+{
+ if (!(flags & BAF_OPTIONAL))
+ WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
+
/* Cannot use bgp_set_attr_data() as it works on known attributes only */
- ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
+ ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len);
+}
+
+static inline void
+bgp_format_unknown(const eattr *a, byte *buf, uint size)
+{
+ if (a->flags & BAF_TRANSITIVE)
+ bsnprintf(buf, size, "(transitive)");
}
@@ -981,10 +1051,10 @@ bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data,
* Attribute table
*/
-static const struct bgp_attr_desc bgp_attr_table[] = {
+static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = {
[BA_ORIGIN] = {
- .name = "origin",
- .type = EAF_TYPE_INT,
+ .name = "bgp_origin",
+ .type = T_ENUM_BGP_ORIGIN,
.flags = BAF_TRANSITIVE,
.export = bgp_export_origin,
.encode = bgp_encode_u8,
@@ -992,69 +1062,69 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_origin,
},
[BA_AS_PATH] = {
- .name = "as_path",
- .type = EAF_TYPE_AS_PATH,
+ .name = "bgp_path",
+ .type = T_PATH,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_as_path,
.decode = bgp_decode_as_path,
},
[BA_NEXT_HOP] = {
- .name = "next_hop",
- .type = EAF_TYPE_IP_ADDRESS,
+ .name = "bgp_next_hop",
+ .type = T_IP,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_next_hop,
.decode = bgp_decode_next_hop,
.format = bgp_format_next_hop,
},
[BA_MULTI_EXIT_DISC] = {
- .name = "med",
- .type = EAF_TYPE_INT,
+ .name = "bgp_med",
+ .type = T_INT,
.flags = BAF_OPTIONAL,
.encode = bgp_encode_u32,
.decode = bgp_decode_med,
},
[BA_LOCAL_PREF] = {
- .name = "local_pref",
- .type = EAF_TYPE_INT,
+ .name = "bgp_local_pref",
+ .type = T_INT,
.flags = BAF_TRANSITIVE,
.export = bgp_export_local_pref,
.encode = bgp_encode_u32,
.decode = bgp_decode_local_pref,
},
[BA_ATOMIC_AGGR] = {
- .name = "atomic_aggr",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_atomic_aggr",
+ .type = T_OPAQUE,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_atomic_aggr,
},
[BA_AGGREGATOR] = {
- .name = "aggregator",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_aggregator",
+ .type = T_OPAQUE,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_aggregator,
.decode = bgp_decode_aggregator,
.format = bgp_format_aggregator,
},
[BA_COMMUNITY] = {
- .name = "community",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_community",
+ .type = T_CLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_community,
},
[BA_ORIGINATOR_ID] = {
- .name = "originator_id",
- .type = EAF_TYPE_ROUTER_ID,
+ .name = "bgp_originator_id",
+ .type = T_QUAD,
.flags = BAF_OPTIONAL,
.export = bgp_export_originator_id,
.encode = bgp_encode_u32,
.decode = bgp_decode_originator_id,
},
[BA_CLUSTER_LIST] = {
- .name = "cluster_list",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_cluster_list",
+ .type = T_CLIST,
.flags = BAF_OPTIONAL,
.export = bgp_export_cluster_list,
.encode = bgp_encode_u32s,
@@ -1062,43 +1132,47 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_cluster_list,
},
[BA_MP_REACH_NLRI] = {
- .name = "mp_reach_nlri",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_mp_reach_nlri",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_reach_nlri,
},
[BA_MP_UNREACH_NLRI] = {
- .name = "mp_unreach_nlri",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_mp_unreach_nlri",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_unreach_nlri,
},
[BA_EXT_COMMUNITY] = {
- .name = "ext_community",
- .type = EAF_TYPE_EC_SET,
+ .name = "bgp_ext_community",
+ .type = T_ECLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_ext_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_ext_community,
},
[BA_AS4_PATH] = {
- .name = "as4_path",
- .type = EAF_TYPE_AS_PATH,
+ .name = "bgp_as4_path",
+ .type = T_PATH,
+ .hidden = 1,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_path,
},
[BA_AS4_AGGREGATOR] = {
- .name = "as4_aggregator",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_as4_aggregator",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_aggregator,
.format = bgp_format_aggregator,
},
[BA_AIGP] = {
- .name = "aigp",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_aigp",
+ .type = T_OPAQUE,
.flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
.export = bgp_export_aigp,
.encode = bgp_encode_raw,
@@ -1106,16 +1180,24 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_aigp,
},
[BA_LARGE_COMMUNITY] = {
- .name = "large_community",
- .type = EAF_TYPE_LC_SET,
+ .name = "bgp_large_community",
+ .type = T_LCLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_large_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_large_community,
},
+ [BA_ONLY_TO_CUSTOMER] = {
+ .name = "otc",
+ .type = T_INT,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .encode = bgp_encode_u32,
+ .decode = bgp_decode_otc,
+ },
[BA_MPLS_LABEL_STACK] = {
- .name = "mpls_label_stack",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_mpls_label_stack",
+ .type = T_CLIST,
+ .readonly = 1,
.export = bgp_export_mpls_label_stack,
.encode = bgp_encode_mpls_label_stack,
.decode = bgp_decode_mpls_label_stack,
@@ -1123,12 +1205,32 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
},
};
-static inline int
-bgp_attr_known(uint code)
+eattr *
+bgp_find_attr(ea_list *attrs, uint code)
{
- return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
+ return ea_find(attrs, BGP_EA_ID(code));
}
+void
+bgp_register_attrs(void)
+{
+ for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++)
+ {
+ if (!bgp_attr_table[i].name)
+ bgp_attr_table[i] = (union bgp_attr_desc) {
+ .name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i),
+ .type = T_OPAQUE,
+ .flags = BAF_OPTIONAL,
+ .readonly = 1,
+ .export = bgp_export_unknown,
+ .encode = bgp_encode_raw,
+ .decode = bgp_decode_unknown,
+ .format = bgp_format_unknown,
+ };
+
+ ea_register_init(&bgp_attr_table[i].class);
+ }
+}
/*
* Attribute export
@@ -1137,38 +1239,24 @@ bgp_attr_known(uint code)
static inline void
bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
{
- if (EA_PROTO(a->id) != PROTOCOL_BGP)
+ const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
+ if (!desc)
return;
- uint code = EA_ID(a->id);
+ /* The flags might have been zero if the attr was added locally */
+ a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
- if (bgp_attr_known(code))
- {
- const struct bgp_attr_desc *desc = &bgp_attr_table[code];
-
- /* The flags might have been zero if the attr was added by filters */
- a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
-
- /* Set partial bit if new opt-trans attribute is attached to non-local route */
- if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
- (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
- a->flags |= BAF_PARTIAL;
-
- /* Call specific hook */
- CALL(desc->export, s, a);
+ /* Set partial bit if new opt-trans attribute is attached to non-local route */
+ if ((s->src != NULL) && (a->originated) &&
+ (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
+ a->flags |= BAF_PARTIAL;
- /* Attribute might become undefined in hook */
- if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
- return;
- }
- else
- {
- /* Don't re-export unknown non-transitive attributes */
- if (!(a->flags & BAF_TRANSITIVE))
- return;
+ /* Call specific hook */
+ CALL(desc->export, s, a);
- a->flags |= BAF_PARTIAL;
- }
+ /* Attribute might become undefined in hook */
+ if (a->undef)
+ return;
/* Append updated attribute */
to->attrs[to->count++] = *a;
@@ -1188,12 +1276,11 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
* Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
*/
static inline ea_list *
-bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
+bgp_export_attrs(struct bgp_export_state *s, ea_list *a)
{
/* Merge the attribute list */
- ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
- ea_merge(attrs, new);
- ea_sort(new);
+ ea_list *new = ea_normalize(a, 0);
+ ASSERT_DIE(new);
uint i, count;
count = new->count;
@@ -1203,7 +1290,7 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
for (i = 0; i < count; i++)
bgp_export_attr(s, &new->attrs[i], new);
- if (s->err_withdraw)
+ if (s->err_reject)
return NULL;
return new;
@@ -1217,14 +1304,9 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
static inline int
bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
- ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
-
- uint code = EA_ID(a->id);
-
- if (bgp_attr_known(code))
- return bgp_attr_table[code].encode(s, a, buf, size);
- else
- return bgp_encode_raw(s, a, buf, size);
+ const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
+ ASSERT_DIE(desc);
+ return desc->encode(s, a, buf, size);
}
/**
@@ -1289,7 +1371,7 @@ bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
}
static inline void
-bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to)
{
/* Handle duplicate attributes; RFC 7606 3 (g) */
if (BIT32_TEST(s->attrs_seen, code))
@@ -1301,24 +1383,15 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui
}
BIT32_SET(s->attrs_seen, code);
- if (bgp_attr_known(code))
- {
- const struct bgp_attr_desc *desc = &bgp_attr_table[code];
+ ASSERT_DIE(bgp_attr_table[code].id);
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
- /* Handle conflicting flags; RFC 7606 3 (c) */
- if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
- !(desc->flags & BAF_DECODE_FLAGS))
- WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
+ /* Handle conflicting flags; RFC 7606 3 (c) */
+ if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
+ !(desc->flags & BAF_DECODE_FLAGS))
+ WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags);
- desc->decode(s, code, flags, data, len, to);
- }
- else /* Unknown attribute */
- {
- if (!(flags & BAF_OPTIONAL))
- WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
-
- bgp_decode_unknown(s, code, flags, data, len, to);
- }
+ desc->decode(s, code, flags, data, len, to);
}
/**
@@ -1336,7 +1409,8 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
{
struct bgp_proto *p = s->proto;
ea_list *attrs = NULL;
- uint code, flags, alen;
+ uint alen;
+ byte code, flags;
byte *pos = data;
/* Parse the attributes */
@@ -1401,23 +1475,23 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
/* Reject routes with our ASN in AS_PATH attribute */
if (bgp_as_path_loopy(p, attrs, p->local_as))
- goto withdraw;
+ goto loop;
/* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
- goto withdraw;
+ goto loop;
/* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
if (p->is_internal && bgp_originator_id_loopy(p, attrs))
- goto withdraw;
+ goto loop;
/* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
- goto withdraw;
+ goto loop;
/* If there is no local preference, define one */
if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
- bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+ bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
return attrs;
@@ -1434,16 +1508,43 @@ withdraw:
s->err_withdraw = 1;
return NULL;
+
+loop:
+ /* Loops are handled as withdraws, but ignored silently. Do not set err_withdraw. */
+ return NULL;
}
void
-bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
+bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
{
/* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
{
REPORT("Discarding AIGP attribute received on non-AIGP session");
- bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
+ bgp_unset_attr(to, BA_AIGP);
+ }
+
+ /* Handle OTC ingress procedure, RFC 9234 */
+ if (bgp_channel_is_role_applicable(s->channel))
+ {
+ struct bgp_proto *p = s->proto;
+ eattr *e = bgp_find_attr(*to, BA_ONLY_TO_CUSTOMER);
+
+ /* Reject routes from downstream if they are leaked */
+ if (e && (p->cf->local_role == BGP_ROLE_PROVIDER ||
+ p->cf->local_role == BGP_ROLE_RS_SERVER))
+ WITHDRAW("Route leak detected - OTC attribute from downstream");
+
+ /* Reject routes from peers if they are leaked */
+ if (e && (p->cf->local_role == BGP_ROLE_PEER) && (e->u.data != p->cf->remote_as))
+ WITHDRAW("Route leak detected - OTC attribute with mismatched ASN (%u)",
+ (uint) e->u.data);
+
+ /* Mark routes from upstream if it did not happened before */
+ if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER ||
+ p->cf->local_role == BGP_ROLE_PEER ||
+ p->cf->local_role == BGP_ROLE_RS_CLIENT))
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as);
}
}
@@ -1458,13 +1559,13 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
#define RBH_FN(a,h) h
#define RBH_REHASH bgp_rbh_rehash
-#define RBH_PARAMS /8, *2, 2, 2, 8, 20
+#define RBH_PARAMS /8, *2, 2, 2, 12, 20
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
-void
-bgp_init_bucket_table(struct bgp_channel *c)
+static void
+bgp_init_bucket_table(struct bgp_pending_tx *c)
{
HASH_INIT(c->bucket_hash, c->pool, 8);
@@ -1472,24 +1573,8 @@ bgp_init_bucket_table(struct bgp_channel *c)
c->withdraw_bucket = NULL;
}
-void
-bgp_free_bucket_table(struct bgp_channel *c)
-{
- HASH_FREE(c->bucket_hash);
-
- struct bgp_bucket *b;
- WALK_LIST_FIRST(b, c->bucket_queue)
- {
- rem_node(&b->send_node);
- mb_free(b);
- }
-
- mb_free(c->withdraw_bucket);
- c->withdraw_bucket = NULL;
-}
-
static struct bgp_bucket *
-bgp_get_bucket(struct bgp_channel *c, ea_list *new)
+bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new)
{
/* Hash and lookup */
u32 hash = ea_hash(new);
@@ -1498,55 +1583,27 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new)
if (b)
return b;
- uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
- uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
- uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
- uint i;
- byte *dest;
-
- /* Gather total size of non-inline attributes */
- for (i = 0; i < new->count; i++)
- {
- eattr *a = &new->attrs[i];
-
- if (!(a->type & EAF_EMBEDDED))
- size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
- }
+ /* Scan the list for total size */
+ uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new));
+ uint size = sizeof(struct bgp_bucket) + ea_size;
- /* Create the bucket */
+ /* Allocate the bucket */
b = mb_alloc(c->pool, size);
*b = (struct bgp_bucket) { };
init_list(&b->prefixes);
b->hash = hash;
- /* Copy list of extended attributes */
- memcpy(b->eattrs, new, ea_size);
- dest = ((byte *) b->eattrs) + ea_size_aligned;
-
- /* Copy values of non-inline attributes */
- for (i = 0; i < new->count; i++)
- {
- eattr *a = &b->eattrs->attrs[i];
-
- if (!(a->type & EAF_EMBEDDED))
- {
- const struct adata *oa = a->u.ptr;
- struct adata *na = (struct adata *) dest;
- memcpy(na, oa, sizeof(struct adata) + oa->length);
- a->u.ptr = na;
- dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
- }
- }
+ /* Copy the ea_list */
+ ea_list_copy(b->eattrs, new, ea_size);
- /* Insert the bucket to send queue and bucket hash */
- add_tail(&c->bucket_queue, &b->send_node);
+ /* Insert the bucket to bucket hash */
HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
return b;
}
static struct bgp_bucket *
-bgp_get_withdraw_bucket(struct bgp_channel *c)
+bgp_get_withdraw_bucket(struct bgp_pending_tx *c)
{
if (!c->withdraw_bucket)
{
@@ -1557,25 +1614,45 @@ bgp_get_withdraw_bucket(struct bgp_channel *c)
return c->withdraw_bucket;
}
-void
-bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+static void
+bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b)
{
- rem_node(&b->send_node);
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
+int
+bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
+{
+ struct bgp_pending_tx *c = bc->ptx;
+
+ /* Won't free the withdraw bucket */
+ if (b == c->withdraw_bucket)
+ return 0;
+
+ if (EMPTY_LIST(b->prefixes))
+ rem_node(&b->send_node);
+
+ if (b->px_uc || !EMPTY_LIST(b->prefixes))
+ return 0;
+
+ bgp_free_bucket(c, b);
+ return 1;
+}
+
void
-bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
+ struct bgp_pending_tx *c = bc->ptx;
rem_node(&b->send_node);
add_tail(&c->bucket_queue, &b->send_node);
}
void
-bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
- struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_proto *p = (void *) bc->c.proto;
+ struct bgp_pending_tx *c = bc->ptx;
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
log(L_ERR "%s: Attribute list too long", p->p.name);
@@ -1584,8 +1661,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
struct bgp_prefix *px = HEAD(b->prefixes);
log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
- rem_node(&px->buck_node);
- add_tail(&wb->prefixes, &px->buck_node);
+ rem_node(&px->buck_node_xx);
+ add_tail(&wb->prefixes, &px->buck_node_xx);
}
}
@@ -1596,42 +1673,43 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
-#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
-#define PXH_PARAMS /8, *2, 2, 2, 8, 24
+#define PXH_PARAMS /8, *2, 2, 2, 12, 24
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
-void
-bgp_init_prefix_table(struct bgp_channel *c)
+static void
+bgp_init_prefix_table(struct bgp_channel *bc)
{
+ struct bgp_pending_tx *c = bc->ptx;
HASH_INIT(c->prefix_hash, c->pool, 8);
- uint alen = net_addr_length[c->c.net_type];
+ uint alen = net_addr_length[bc->c.net_type];
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
-void
-bgp_free_prefix_table(struct bgp_channel *c)
-{
- HASH_FREE(c->prefix_hash);
-
- rfree(c->prefix_slab);
- c->prefix_slab = NULL;
-}
-
static struct bgp_prefix *
-bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id)
+bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx)
{
- u32 hash = net_hash(net) ^ u32_hash(path_id);
- struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
+ u32 path_id = src->global_id;
+ u32 path_id_hash = add_path_tx ? path_id : 0;
+ /* We must use a different hash function than the rtable */
+ u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
+ struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
if (px)
{
- rem_node(&px->buck_node);
+ if (!add_path_tx && (path_id != px->path_id))
+ {
+ rt_unlock_source(rt_find_source_global(px->path_id));
+ rt_lock_source(src);
+ px->path_id = path_id;
+ }
+
return px;
}
@@ -1644,34 +1722,321 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id)
px->hash = hash;
px->path_id = path_id;
net_copy(px->net, net);
+ rt_lock_source(src);
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
return px;
}
-void
-bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
+static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px);
+
+static inline int
+bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
+{
+#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket)
+#define BPX_TRACE(what) do { \
+ if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
+ c->c.proto->name, c->c.name, what, \
+ px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0)
+ px->lastmod = current_time();
+
+ /* Already queued for the same bucket */
+ if (px->cur == b)
+ {
+ BPX_TRACE("already queued");
+ return 0;
+ }
+
+ /* Unqueue from the old bucket */
+ if (px->cur)
+ {
+ rem_node(&px->buck_node_xx);
+ bgp_done_bucket(c, px->cur);
+ }
+
+ /* The new bucket is the same as we sent before */
+ if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b))
+ {
+ if (px->cur)
+ BPX_TRACE("reverted");
+ else
+ BPX_TRACE("already sent");
+
+ /* Well, we haven't sent anything yet */
+ if (!px->last)
+ bgp_free_prefix(c->ptx, px);
+
+ px->cur = NULL;
+ return 0;
+ }
+
+ /* Enqueue the bucket if it has been empty */
+ if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes))
+ add_tail(&c->ptx->bucket_queue, &b->send_node);
+
+ /* Enqueue to the new bucket and indicate the change */
+ add_tail(&b->prefixes, &px->buck_node_xx);
+ px->cur = b;
+
+ BPX_TRACE("queued");
+ return 1;
+
+#undef BPX_TRACE
+}
+
+static void
+bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px)
{
- rem_node(&px->buck_node);
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
+ rt_unlock_source(rt_find_source_global(px->path_id));
+
if (c->prefix_slab)
- sl_free(c->prefix_slab, px);
+ sl_free(px);
else
mb_free(px);
}
+void
+bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck)
+{
+ /* Cleanup: We're called from bucket senders. */
+ ASSERT_DIE(px->cur == buck);
+ rem_node(&px->buck_node_xx);
+
+ /* We may want to store the updates */
+ if (c->c.out_table)
+ {
+ /* Nothing to be sent right now */
+ px->cur = NULL;
+
+ /* Unref the previous sent version */
+ if (px->last)
+ px->last->px_uc--;
+
+ /* Ref the current sent version */
+ if (!IS_WITHDRAW_BUCKET(buck))
+ {
+ px->last = buck;
+ px->last->px_uc++;
+ return;
+ }
+
+ /* Prefixes belonging to the withdraw bucket are freed always */
+ }
+
+ bgp_free_prefix(c->ptx, px);
+}
+
+static void
+bgp_pending_tx_rfree(resource *r)
+{
+ struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
+
+ HASH_WALK(ptx->prefix_hash, next, n)
+ rt_unlock_source(rt_find_source_global(n->path_id));
+ HASH_WALK_END;
+}
+
+static void bgp_pending_tx_dump(resource *r UNUSED, unsigned indent UNUSED) { debug("\n"); }
+
+static struct resclass bgp_pending_tx_class = {
+ .name = "BGP Pending TX",
+ .size = sizeof(struct bgp_pending_tx),
+ .free = bgp_pending_tx_rfree,
+ .dump = bgp_pending_tx_dump,
+};
+
+void
+bgp_init_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(!c->ptx);
+
+ pool *p = rp_new(c->pool, "BGP Pending TX");
+ c->ptx = ralloc(p, &bgp_pending_tx_class);
+ c->ptx->pool = p;
+
+ bgp_init_bucket_table(c->ptx);
+ bgp_init_prefix_table(c);
+}
+
+void
+bgp_free_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->ptx);
+ ASSERT_DIE(c->ptx->pool);
+
+ rfree(c->ptx->pool);
+ c->ptx = NULL;
+}
+
+
+/*
+ * Prefix hash table exporter
+ */
+
+struct bgp_out_export_hook {
+ struct rt_export_hook h;
+ u32 hash_iter; /* Iterator over hash */
+};
+
+static void
+bgp_out_table_feed(void *data)
+{
+ struct bgp_out_export_hook *hook = data;
+ struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->h.table);
+ struct bgp_pending_tx *c = bc->ptx;
+
+ int max = 512;
+
+ const net_addr *neq = (hook->h.req->addr_mode == TE_ADDR_EQUAL) ? hook->h.req->addr : NULL;
+ const net_addr *cand = NULL;
+
+ do {
+ HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter)
+ {
+ switch (hook->h.req->addr_mode)
+ {
+ case TE_ADDR_IN:
+ if (!net_in_netX(n->net, hook->h.req->addr))
+ continue;
+ /* fall through */
+ case TE_ADDR_NONE:
+ /* Splitting only for multi-net exports */
+ if (--max <= 0)
+ HASH_WALK_ITER_PUT;
+ break;
+
+ case TE_ADDR_FOR:
+ if (!neq)
+ {
+ if (net_in_netX(hook->h.req->addr, n->net) && (!cand || (n->net->length > cand->length)))
+ cand = n->net;
+ continue;
+ }
+ /* fall through */
+ case TE_ADDR_EQUAL:
+ if (!net_equal(n->net, neq))
+ continue;
+ break;
+ }
+
+ struct bgp_bucket *buck = n->cur ?: n->last;
+ ea_list *ea = NULL;
+ if (buck == c->withdraw_bucket)
+ ea_set_dest(&ea, 0, RTD_UNREACHABLE);
+ else
+ {
+ ea = buck->eattrs;
+ eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP);
+ ASSERT_DIE(eanh);
+ const ip_addr *nh = (const void *) eanh->u.ptr->data;
+
+ struct nexthop_adata nhad = {
+ .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), },
+ .nh = { .gw = nh[0], },
+ };
+
+ ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad)));
+ }
+
+ struct rte_storage es = {
+ .rte = {
+ .attrs = ea,
+ .net = n->net,
+ .src = rt_find_source_global(n->path_id),
+ .sender = NULL,
+ .lastmod = n->lastmod,
+ .flags = n->cur ? REF_PENDING : 0,
+ },
+ };
+
+ struct rt_pending_export rpe = {
+ .new = &es, .new_best = &es,
+ };
+
+ if (hook->h.req->export_bulk)
+ {
+ const rte *feed = &es.rte;
+ hook->h.req->export_bulk(hook->h.req, n->net, &rpe, &rpe, &feed, 1);
+ }
+ else if (hook->h.req->export_one)
+ hook->h.req->export_one(hook->h.req, n->net, &rpe);
+ else
+ bug("No export method in export request");
+ }
+ HASH_WALK_ITER_END;
+
+ neq = cand;
+ cand = NULL;
+ } while (neq);
+
+ if (hook->hash_iter)
+ ev_schedule_work(&hook->h.event);
+ else
+ rt_set_export_state(&hook->h, TES_READY);
+}
+
+static void
+bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req)
+{
+ req->hook = rt_alloc_export(re, sizeof(struct bgp_out_export_hook));
+ req->hook->req = req;
+
+ struct bgp_out_export_hook *hook = SKIP_BACK(struct bgp_out_export_hook, h, req->hook);
+
+ hook->h.event.hook = bgp_out_table_feed;
+ rt_init_export(re, req->hook);
+}
+
+static void
+bgp_out_table_export_done(void *data)
+{
+ struct bgp_out_export_hook *hook = data;
+ struct rt_export_request *req = hook->h.req;
+ void (*stopped)(struct rt_export_request *) = hook->h.stopped;
+
+ rt_export_stopped(&hook->h);
+ CALL(stopped, req);
+}
+
+static const struct rt_exporter_class bgp_out_table_export_class = {
+ .start = bgp_out_table_export_start,
+ .done = bgp_out_table_export_done,
+};
+
+void
+bgp_setup_out_table(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->c.out_table == NULL);
+
+ c->prefix_exporter = (struct rt_exporter) {
+ .class = &bgp_out_table_export_class,
+ .addr_type = c->c.table->addr_type,
+ .rp = c->c.proto->pool,
+ };
+
+ rt_exporter_init(&c->prefix_exporter);
+
+ c->c.out_table = &c->prefix_exporter;
+}
+
/*
* BGP protocol glue
*/
int
-bgp_preexport(struct channel *c, rte *e)
+bgp_preexport(struct channel *C, rte *e)
{
- struct bgp_proto *p = (struct bgp_proto *) (c->proto);
+ struct bgp_proto *p = (struct bgp_proto *) C->proto;
struct bgp_proto *src = bgp_rte_proto(e);
+ struct bgp_channel *c = (struct bgp_channel *) C;
+
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return -1;
/* Reject our routes */
if (src == p)
@@ -1681,6 +2046,22 @@ bgp_preexport(struct channel *c, rte *e)
if (src == NULL)
return 0;
+ /* Reject flowspec that failed validation */
+ if (net_is_flow(e->net))
+ switch (rt_get_flowspec_valid(e))
+ {
+ case FLOWSPEC_VALID:
+ break;
+ case FLOWSPEC_INVALID:
+ return -1;
+ case FLOWSPEC_UNKNOWN:
+ ASSUME((rt_get_source_attr(e) != RTS_BGP) ||
+ !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table);
+ break;
+ case FLOWSPEC__MAX:
+ bug("This never happens.");
+ }
+
/* IBGP route reflection, RFC 4456 */
if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
{
@@ -1690,16 +2071,16 @@ bgp_preexport(struct channel *c, rte *e)
/* Generally, this should be handled when path is received, but we check it
also here as rr_cluster_id may be undefined or different in src. */
- if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
+ if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs))
return -1;
}
/* Handle well-known communities, RFC 1997 */
- struct eattr *com;
+ struct eattr *a;
if (p->cf->interpret_communities &&
- (com = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
+ (a = bgp_find_attr(e->attrs, BA_COMMUNITY)))
{
- const struct adata *d = com->u.ptr;
+ const struct adata *d = a->u.ptr;
/* Do not export anywhere */
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
@@ -1718,6 +2099,16 @@ bgp_preexport(struct channel *c, rte *e)
return -1;
}
+ /* Do not export routes marked with OTC to upstream, RFC 9234 */
+ if (bgp_channel_is_role_applicable(c))
+ {
+ a = bgp_find_attr(e->attrs, BA_ONLY_TO_CUSTOMER);
+ if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER ||
+ p->cf->local_role==BGP_ROLE_PEER ||
+ p->cf->local_role==BGP_ROLE_RS_CLIENT))
+ return -1;
+ }
+
return 0;
}
@@ -1732,7 +2123,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
/* ORIGIN attribute - mandatory, attach if missing */
if (! bgp_find_attr(attrs0, BA_ORIGIN))
- bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
+ bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
/* AS_PATH attribute - mandatory */
a = bgp_find_attr(attrs0, BA_AS_PATH);
@@ -1747,24 +2138,24 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
{
/* IBGP or route server -> just ensure there is one */
if (!a)
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata);
}
else if (p->is_interior)
{
/* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
}
else /* Regular EBGP (no RS, no confederation) */
{
/* Regular EBGP -> prepend ASN as regular sequence */
ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
/* MULTI_EXIT_DESC attribute - accept only if set in export filter */
a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
- if (a && !(a->type & EAF_FRESH))
- bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
+ if (a && !a->fresh && !p->cf->allow_med)
+ bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC);
}
/* NEXT_HOP attribute - delegated to AF-specific hook */
@@ -1773,16 +2164,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
/* LOCAL_PREF attribute - required for IBGP, attach if missing */
if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
- bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+ bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
/* AIGP attribute - accumulate local metric or originate new one */
u64 metric;
if (s.local_next_hop &&
- (bgp_total_aigp_metric_(e->attrs, &metric, &ad) ||
+ (bgp_total_aigp_metric_(e, &metric, &ad) ||
(c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
{
ad = bgp_aigp_set_metric(pool, ad, metric);
- bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad);
}
/* IBGP route reflection, RFC 4456 */
@@ -1790,7 +2181,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
{
/* ORIGINATOR_ID attribute - attach if not already set */
if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
- bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
+ bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id);
/* CLUSTER_LIST attribute - prepend cluster ID */
a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
@@ -1805,7 +2196,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
ad = int_set_prepend(pool, ad, p->rr_cluster_id);
/* Should be at least one prepended cluster ID */
- bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad);
}
/* AS4_* transition attributes, RFC 6793 4.2.2 */
@@ -1814,18 +2205,28 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
a = bgp_find_attr(attrs, BA_AS_PATH);
if (a && as_path_contains_as4(a->u.ptr))
{
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
- bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
}
a = bgp_find_attr(attrs, BA_AGGREGATOR);
if (a && aggregator_contains_as4(a->u.ptr))
{
- bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
- bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
+ bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr);
}
}
+ /* Mark routes for downstream with OTC, RFC 9234 */
+ if (bgp_channel_is_role_applicable(c))
+ {
+ a = bgp_find_attr(attrs, BA_ONLY_TO_CUSTOMER);
+ if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER ||
+ p->cf->local_role == BGP_ROLE_PEER ||
+ p->cf->local_role == BGP_ROLE_RS_SERVER))
+ bgp_set_attr_u32(&attrs, BA_ONLY_TO_CUSTOMER, 0, p->public_as);
+ }
+
/*
* Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
* conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
@@ -1842,34 +2243,39 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
struct bgp_proto *p = (void *) P;
struct bgp_channel *c = (void *) C;
struct bgp_bucket *buck;
- struct bgp_prefix *px;
- u32 path;
+ struct rte_src *path;
+
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
if (new)
{
- struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, C->rte_update_pool);
+ struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs, tmp_linpool);
+
+ /* Error during attribute processing */
+ if (!attrs)
+ log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n);
/* If attributes are invalid, we fail back to withdraw */
- buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
- path = new->src->global_id;
+ buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx);
+ path = new->src;
}
else
{
- buck = bgp_get_withdraw_bucket(c);
- path = old->src->global_id;
+ buck = bgp_get_withdraw_bucket(c->ptx);
+ path = old->src;
}
- px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0);
- add_tail(&buck->prefixes, &px->buck_node);
-
- bgp_schedule_packet(p->conn, c, PKT_UPDATE);
+ if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck))
+ bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
static inline u32
-bgp_get_neighbor(rte *r)
+bgp_get_neighbor(const rte *r)
{
- eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ eattr *e = ea_find(r->attrs, BGP_EA_ID(BA_AS_PATH));
u32 as;
if (e && as_path_get_first_regular(e->u.ptr, &as))
@@ -1881,30 +2287,14 @@ bgp_get_neighbor(rte *r)
}
static inline int
-rte_stale(rte *r)
+rte_stale(const rte *r)
{
- if (r->pflags & BGP_REF_STALE)
- return 1;
-
- if (r->pflags & BGP_REF_NOT_STALE)
- return 0;
-
- /* If staleness is unknown, compute and cache it */
- eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
- if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE))
- {
- r->pflags |= BGP_REF_STALE;
- return 1;
- }
- else
- {
- r->pflags |= BGP_REF_NOT_STALE;
- return 0;
- }
+ eattr *a = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
+ return a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
}
int
-bgp_rte_better(rte *new, rte *old)
+bgp_rte_better(const rte *new, const rte *old)
{
struct bgp_proto *new_bgp = bgp_rte_proto(new);
struct bgp_proto *old_bgp = bgp_rte_proto(old);
@@ -1920,8 +2310,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* RFC 4271 9.1.2.1. Route resolvability test */
- n = rta_resolvable(new->attrs);
- o = rta_resolvable(old->attrs);
+ n = rte_resolvable(new);
+ o = rte_resolvable(old);
if (n > o)
return 1;
if (n < o)
@@ -1936,8 +2326,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* Start with local preferences */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_LOCAL_PREF));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_LOCAL_PREF));
n = x ? x->u.data : new_bgp->cf->default_local_pref;
o = y ? y->u.data : old_bgp->cf->default_local_pref;
if (n > o)
@@ -1946,8 +2336,8 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 7311 4.1 - Apply AIGP metric */
- u64 n2 = bgp_total_aigp_metric(new->attrs);
- u64 o2 = bgp_total_aigp_metric(old->attrs);
+ u64 n2 = bgp_total_aigp_metric(new);
+ u64 o2 = bgp_total_aigp_metric(old);
if (n2 < o2)
return 1;
if (n2 > o2)
@@ -1956,8 +2346,8 @@ bgp_rte_better(rte *new, rte *old)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_AS_PATH));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_AS_PATH));
n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (n < o)
@@ -1967,8 +2357,8 @@ bgp_rte_better(rte *new, rte *old)
}
/* RFC 4271 9.1.2.2. b) Use origins */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGIN));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGIN));
n = x ? x->u.data : ORIGIN_INCOMPLETE;
o = y ? y->u.data : ORIGIN_INCOMPLETE;
if (n < o)
@@ -1990,8 +2380,8 @@ bgp_rte_better(rte *new, rte *old)
if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
(bgp_get_neighbor(new) == bgp_get_neighbor(old)))
{
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
n = x ? x->u.data : new_bgp->cf->default_med;
o = y ? y->u.data : old_bgp->cf->default_med;
if (n < o)
@@ -2007,8 +2397,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
- n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
- o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
+ n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0;
+ o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0;
if (n < o)
return 1;
if (n > o)
@@ -2016,8 +2406,8 @@ bgp_rte_better(rte *new, rte *old)
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
/* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
n = x ? x->u.data : new_bgp->remote_id;
o = y ? y->u.data : old_bgp->remote_id;
@@ -2034,8 +2424,8 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 4456 9. b) Compare cluster list lengths */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
n = x ? int_set_get_size(x->u.ptr) : 0;
o = y ? int_set_get_size(y->u.ptr) : 0;
if (n < o)
@@ -2049,7 +2439,7 @@ bgp_rte_better(rte *new, rte *old)
int
-bgp_rte_mergable(rte *pri, rte *sec)
+bgp_rte_mergable(const rte *pri, const rte *sec)
{
struct bgp_proto *pri_bgp = bgp_rte_proto(pri);
struct bgp_proto *sec_bgp = bgp_rte_proto(sec);
@@ -2057,17 +2447,20 @@ bgp_rte_mergable(rte *pri, rte *sec)
u32 p, s;
/* Skip suppressed routes (see bgp_rte_recalculate()) */
- /* LLGR draft - depreference stale routes */
- if (pri->pflags != sec->pflags)
+ if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED)
return 0;
/* RFC 4271 9.1.2.1. Route resolvability test */
- if (rta_resolvable(pri->attrs) != rta_resolvable(sec->attrs))
+ if (rte_resolvable(pri) != rte_resolvable(sec))
+ return 0;
+
+ /* LLGR draft - depreference stale routes */
+ if (rte_stale(pri) != rte_stale(sec))
return 0;
/* Start with local preferences */
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_LOCAL_PREF));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_LOCAL_PREF));
p = x ? x->u.data : pri_bgp->cf->default_local_pref;
s = y ? y->u.data : sec_bgp->cf->default_local_pref;
if (p != s)
@@ -2076,8 +2469,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
{
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_AS_PATH));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_AS_PATH));
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
@@ -2089,8 +2482,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
}
/* RFC 4271 9.1.2.2. b) Use origins */
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_ORIGIN));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_ORIGIN));
p = x ? x->u.data : ORIGIN_INCOMPLETE;
s = y ? y->u.data : ORIGIN_INCOMPLETE;
if (p != s)
@@ -2100,8 +2493,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
{
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
p = x ? x->u.data : pri_bgp->cf->default_med;
s = y ? y->u.data : sec_bgp->cf->default_med;
if (p != s)
@@ -2113,8 +2506,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
return 0;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
- p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
- s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
+ p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0;
+ s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0;
if (p != s)
return 0;
@@ -2127,7 +2520,7 @@ bgp_rte_mergable(rte *pri, rte *sec)
static inline int
same_group(rte *r, u32 lpref, u32 lasn)
{
- return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn);
+ return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn);
}
static inline int
@@ -2138,10 +2531,10 @@ use_deterministic_med(struct rte_storage *r)
}
int
-bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best)
+bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best)
{
rte *key = new ? new : old;
- u32 lpref = key->attrs->pref;
+ u32 lpref = rt_get_preference(key);
u32 lasn = bgp_get_neighbor(key);
int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0;
@@ -2208,7 +2601,7 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol
/* The default case - find a new best-in-group route */
rte *r = new; /* new may not be in the list */
- for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next)
+ for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
{
s->rte.pflags |= BGP_REF_SUPPRESSED;
@@ -2225,7 +2618,7 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol
new->pflags &= ~BGP_REF_SUPPRESSED;
/* Found all existing routes mergable with best-in-group */
- for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next)
+ for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte))
s->rte.pflags &= ~BGP_REF_SUPPRESSED;
@@ -2264,43 +2657,58 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol
}
void
-bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count)
+bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n,
+ struct rt_pending_export *first, struct rt_pending_export *last,
+ const rte **feed, uint count)
{
struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ struct rt_import_hook *irh = c->c.in_req.hook;
- do {
- rte *r = feed[--count];
- if (r->sender != c->c.in_req.hook)
- continue;
+ /* Find our routes among others */
+ for (uint i=0; i<count; i++)
+ {
+ const rte *r = feed[i];
- /* A new route, do not mark as stale */
- if (r->stale_cycle == c->c.in_req.hook->stale_set)
+ if (
+ !rte_is_valid(r) || /* Not a valid route */
+ (r->sender != irh) || /* Not our route */
+ (r->stale_cycle == irh->stale_set)) /* A new route, do not mark as stale */
continue;
- eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
+ eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
const struct adata *ad = ea ? ea->u.ptr : NULL;
uint flags = ea ? ea->flags : BAF_PARTIAL;
- rte e0 = *r;
- e0.flags |= REF_USE_STALE;
-
+ /* LLGR not allowed, withdraw the route */
if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
+ {
rte_import(&c->c.in_req, n, NULL, r->src);
+ continue;
+ }
- else if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
- rte_import(&c->c.in_req, n, &e0, r->src);
+ /* Route already marked as LLGR, do nothing */
+ if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
+ continue;
- else {
- rta *a = e0.attrs = rta_do_cow(r->attrs, c->c.rte_update_pool);
+ /* Store the tmp_linpool state to aggresively save memory */
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
- bgp_set_attr_ptr(&(a->eattrs), c->c.rte_update_pool, BA_COMMUNITY, flags,
- int_set_add(c->c.rte_update_pool, ad, BGP_COMM_LLGR_STALE));
- e0.pflags |= BGP_REF_STALE;
+ /* Mark the route as LLGR */
+ rte e0 = *r;
+ bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE));
- rte_import(&c->c.in_req, n, &e0, r->src);
- lp_flush(c->c.rte_update_pool);
- }
- } while (count);
+ /* We need to update the route but keep it stale. */
+ ASSERT_DIE(irh->stale_set == irh->stale_valid + 1);
+ irh->stale_set--;
+ rte_import(&c->c.in_req, n, &e0, r->src);
+ irh->stale_set++;
+
+ /* Restore the memory state */
+ lp_restore(tmp_linpool, &tmpp);
+ }
+
+ rpe_mark_seen_all(req->hook, first, last, NULL);
}
@@ -2316,8 +2724,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
/* First, unset AS4_* attributes */
- if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
- if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
+ if (p4) bgp_unset_attr(attrs, BA_AS4_PATH);
+ if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR);
/* Handle AGGREGATOR attribute */
if (a2 && a4)
@@ -2350,60 +2758,37 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
}
}
-int
-bgp_get_attr(const eattr *a, byte *buf, int buflen)
-{
- uint i = EA_ID(a->id);
- const struct bgp_attr_desc *d;
- int len;
-
- if (bgp_attr_known(i))
- {
- d = &bgp_attr_table[i];
- len = bsprintf(buf, "%s", d->name);
- buf += len;
- if (d->format)
- {
- *buf++ = ':';
- *buf++ = ' ';
- d->format(a, buf, buflen - len - 2);
- return GA_FULL;
- }
- return GA_NAME;
- }
-
- bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
- return GA_NAME;
-}
-
void
-bgp_get_route_info(rte *e, byte *buf)
+bgp_get_route_info(const rte *e, byte *buf)
{
- eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ eattr *p = ea_find(e->attrs, BGP_EA_ID(BA_AS_PATH));
+ eattr *o = ea_find(e->attrs, BGP_EA_ID(BA_ORIGIN));
u32 origas;
- buf += bsprintf(buf, " (%d", e->attrs->pref);
+ buf += bsprintf(buf, " (%d", rt_get_preference(e));
- if (e->pflags & BGP_REF_SUPPRESSED)
- buf += bsprintf(buf, "-");
+ if (!net_is_flow(e->net))
+ {
+ if (e->pflags & BGP_REF_SUPPRESSED)
+ buf += bsprintf(buf, "-");
- if (rte_stale(e))
- buf += bsprintf(buf, "s");
+ if (rte_stale(e))
+ buf += bsprintf(buf, "s");
- u64 metric = bgp_total_aigp_metric(e->attrs);
- if (metric < BGP_AIGP_MAX)
- {
- buf += bsprintf(buf, "/%lu", metric);
- }
- else if (e->attrs->igp_metric)
- {
- if (!rta_resolvable(e->attrs))
- buf += bsprintf(buf, "/-");
- else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
- buf += bsprintf(buf, "/?");
- else
- buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
+ u64 metric = bgp_total_aigp_metric(e);
+ if (metric < BGP_AIGP_MAX)
+ {
+ buf += bsprintf(buf, "/%lu", metric);
+ }
+ else if (metric = rt_get_igp_metric(e))
+ {
+ if (!rte_resolvable(e))
+ buf += bsprintf(buf, "/-");
+ else if (metric >= IGP_METRIC_UNKNOWN)
+ buf += bsprintf(buf, "/?");
+ else
+ buf += bsprintf(buf, "/%d", metric);
+ }
}
buf += bsprintf(buf, ") [");
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index e2754649..cda0eb8d 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -101,7 +101,9 @@
* RFC 8203 - BGP Administrative Shutdown Communication
* RFC 8212 - Default EBGP Route Propagation Behavior without Policies
* RFC 8654 - Extended Message Support for BGP
- * draft-ietf-idr-ext-opt-param-07
+ * RFC 9072 - Extended Optional Parameters Length for BGP OPEN Message
+ * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
+ * RFC 9234 - Route Leak Prevention and Detection Using Roles
* draft-uttaro-idr-bgp-persistence-04
* draft-walton-bgp-hostname-capability-02
*/
@@ -113,7 +115,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "nest/locks.h"
#include "conf/conf.h"
@@ -124,8 +126,13 @@
#include "bgp.h"
-/* Global list of listening sockets */
-static list STATIC_LIST_INIT(bgp_sockets);
+static void bgp_listen_create(void *);
+
+static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */
+static list STATIC_LIST_INIT(bgp_listen_pending); /* Global list of listening socket open requests */
+static event bgp_listen_event = { .hook = bgp_listen_create };
+
+DOMAIN(rtable) bgp_listen_domain;
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
@@ -136,14 +143,69 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd);
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
static void bgp_listen_sock_err(sock *sk UNUSED, int err);
+static void bgp_initiate_disable(struct bgp_proto *p, int err_val);
static void bgp_graceful_restart_feed(struct bgp_channel *c);
-static inline void channel_refresh_end_reload(struct channel *c)
+
+
+static inline int
+bgp_setup_auth(struct bgp_proto *p, int enable)
{
- channel_refresh_end(c);
+ /* Beware. This is done from main_birdloop and protocol birdloop is NOT ENTERED.
+ * Anyway, we are only accessing:
+ * - protocol config which can be changed only from main_birdloop (reconfig)
+ * - protocol listen socket which is always driven by main_birdloop
+ * - protocol name which is set on reconfig
+ */
+
+ if (p->cf->password && p->listen.sock)
+ {
+ ip_addr prefix = p->cf->remote_ip;
+ int pxlen = -1;
+
+ if (p->cf->remote_range)
+ {
+ prefix = net_prefix(p->cf->remote_range);
+ pxlen = net_pxlen(p->cf->remote_range);
+ }
+
+ int rv = sk_set_md5_auth(p->listen.sock->sk,
+ p->cf->local_ip, prefix, pxlen, p->cf->iface,
+ enable ? p->cf->password : NULL, p->cf->setkey);
+
+ if (rv < 0)
+ sk_log_error(p->listen.sock->sk, p->p.name);
- if (c->in_table)
- channel_request_reload(c);
+ return rv;
+ }
+ else
+ return 0;
+}
+
+/**
+ * bgp_close - close a BGP instance
+ * @p: BGP instance
+ *
+ * This function frees and deconfigures shared BGP resources.
+ */
+static void
+bgp_close(struct bgp_proto *p)
+{
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ struct bgp_listen_request *req = &p->listen;
+ struct bgp_socket *bs = req->sock;
+
+ if (bs)
+ {
+ req->sock = NULL;
+ rem_node(&req->n);
+
+ if (bs && EMPTY_LIST(bs->requests))
+ ev_send(&global_event_list, &bgp_listen_event);
+ }
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
/**
@@ -155,121 +217,136 @@ static inline void channel_refresh_end_reload(struct channel *c)
* is acquired and neighbor is ready). When error, caller should change state to
* PS_DOWN and return immediately.
*/
-static int
+static void
bgp_open(struct bgp_proto *p)
{
- ASSERT_DIE(birdloop_inside(&main_birdloop));
-
- struct bgp_socket *bs = NULL;
- struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
- ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
- (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
- uint port = p->cf->local_port;
- uint flags = p->cf->free_bind ? SKF_FREEBIND : 0;
- uint flag_mask = SKF_FREEBIND;
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+ struct bgp_listen_request *req = &p->listen;
/* We assume that cf->iface is defined iff cf->local_ip is link-local */
+ req->iface = p->cf->strict_bind ? p->cf->iface : NULL;
+ req->vrf = p->p.vrf;
+ req->addr = p->cf->strict_bind ? p->cf->local_ip :
+ (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
+ req->port = p->cf->local_port;
+ req->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
- WALK_LIST(bs, bgp_sockets)
- if (ipa_equal(bs->sk->saddr, addr) &&
- (bs->sk->sport == port) &&
- (bs->sk->iface == ifa) &&
- (bs->sk->vrf == p->p.vrf) &&
- ((bs->sk->flags & flag_mask) == flags))
- {
- bs->uc++;
- p->sock = bs;
- return 0;
- }
-
- sock *sk = sk_new(proto_pool);
- sk->type = SK_TCP_PASSIVE;
- sk->ttl = 255;
- sk->saddr = addr;
- sk->sport = port;
- sk->iface = ifa;
- sk->vrf = p->p.vrf;
- sk->flags = flags | SKF_PASSIVE_THREAD;
- sk->tos = IP_PREC_INTERNET_CONTROL;
- sk->rbsize = BGP_RX_BUFFER_SIZE;
- sk->tbsize = BGP_TX_BUFFER_SIZE;
- sk->rx_hook = bgp_incoming_connection;
- sk->err_hook = bgp_listen_sock_err;
-
- if (sk_open(sk) < 0)
- goto err;
-
- bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
- bs->sk = sk;
- bs->uc = 1;
- p->sock = bs;
- sk->data = bs;
+ BGP_TRACE(D_EVENTS, "Requesting listen socket at %I%J port %u", req->addr, req->iface, req->port);
- add_tail(&bgp_sockets, &bs->n);
+ add_tail(&bgp_listen_pending, &req->n);
+ ev_send(&global_event_list, &bgp_listen_event);
- return 0;
-
-err:
- sk_log_error(sk, p->p.name);
- log(L_ERR "%s: Cannot open listening socket", p->p.name);
- rfree(sk);
- return -1;
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
-/**
- * bgp_close - close a BGP instance
- * @p: BGP instance
- *
- * This function frees and deconfigures shared BGP resources.
- */
static void
-bgp_close(struct bgp_proto *p)
+bgp_listen_create(void *_ UNUSED)
{
ASSERT_DIE(birdloop_inside(&main_birdloop));
- struct bgp_socket *bs = p->sock;
-
- ASSERT(bs && bs->uc);
+ uint flag_mask = SKF_FREEBIND;
- if (--bs->uc)
- return;
+ while (1) {
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
- rfree(bs->sk);
- rem_node(&bs->n);
- mb_free(bs);
-}
+ if (EMPTY_LIST(bgp_listen_pending))
+ {
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+ break;
+ }
-static inline int
-bgp_setup_auth(struct bgp_proto *p, int enable)
-{
- if (p->cf->password)
- {
- ip_addr prefix = p->cf->remote_ip;
- int pxlen = -1;
+ /* Get the first request to match */
+ struct bgp_listen_request *req = HEAD(bgp_listen_pending);
+ struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req);
+ rem_node(&req->n);
+
+ /* First try to find existing socket */
+ struct bgp_socket *bs;
+ WALK_LIST(bs, bgp_sockets)
+ if (ipa_equal(bs->sk->saddr, req->addr) &&
+ (bs->sk->sport == req->port) &&
+ (bs->sk->iface == req->iface) &&
+ (bs->sk->vrf == req->vrf) &&
+ ((bs->sk->flags & flag_mask) == req->flags))
+ break;
- if (p->cf->remote_range)
+ /* Not found any */
+ if (NODE_VALID(bs))
+ BGP_TRACE(D_EVENTS, "Found a listening socket: %p", bs);
+ else
{
- prefix = net_prefix(p->cf->remote_range);
- pxlen = net_pxlen(p->cf->remote_range);
+ /* Allocating new socket from global protocol pool.
+ * We can do this in main_birdloop. */
+ sock *sk = sk_new(proto_pool);
+ sk->type = SK_TCP_PASSIVE;
+ sk->ttl = 255;
+ sk->saddr = req->addr;
+ sk->sport = req->port;
+ sk->iface = req->iface;
+ sk->vrf = req->vrf;
+ sk->flags = req->flags;
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->rbsize = BGP_RX_BUFFER_SIZE;
+ sk->tbsize = BGP_TX_BUFFER_SIZE;
+ sk->rx_hook = bgp_incoming_connection;
+ sk->err_hook = bgp_listen_sock_err;
+
+ if (sk_open(sk, &main_birdloop) < 0)
+ {
+ sk_log_error(sk, p->p.name);
+ log(L_ERR "%s: Cannot open listening socket", p->p.name);
+ rfree(sk);
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ bgp_initiate_disable(p, BEM_NO_SOCKET);
+ continue;
+ }
+
+ bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
+ bs->sk = sk;
+ sk->data = bs;
+
+ init_list(&bs->requests);
+ add_tail(&bgp_sockets, &bs->n);
+
+ BGP_TRACE(D_EVENTS, "Created new listening socket: %p", bs);
}
- int rv = sk_set_md5_auth(p->sock->sk,
- p->cf->local_ip, prefix, pxlen, p->cf->iface,
- enable ? p->cf->password : NULL, p->cf->setkey);
+ req->sock = bs;
+ add_tail(&bs->requests, &req->n);
- if (rv < 0)
- sk_log_error(p->sock->sk, p->p.name);
+ if (bgp_setup_auth(p, 1) < 0)
+ {
+ rem_node(&req->n);
+ req->sock = NULL;
- return rv;
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ bgp_initiate_disable(p, BEM_INVALID_MD5);
+ continue;
+ }
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
- else
- return 0;
+
+ /* Cleanup leftover listening sockets */
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+ struct bgp_socket *bs;
+ node *nxt;
+ WALK_LIST_DELSAFE(bs, nxt, bgp_sockets)
+ if (EMPTY_LIST(bs->requests))
+ {
+ rfree(bs->sk);
+ rem_node(&bs->n);
+ mb_free(bs);
+ }
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
static inline struct bgp_channel *
bgp_find_channel(struct bgp_proto *p, u32 afi)
{
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
if (c->afi == afi)
return c;
@@ -288,6 +365,8 @@ bgp_startup(struct bgp_proto *p)
if (p->postponed_sk)
{
/* Apply postponed incoming connection */
+ sk_reloop(p->postponed_sk, p->p.loop);
+
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
bgp_send_open(&p->incoming_conn);
@@ -305,13 +384,7 @@ bgp_startup_timeout(timer *t)
static void
bgp_initiate(struct bgp_proto *p)
{
- int err_val;
-
- if (bgp_open(p) < 0)
- { err_val = BEM_NO_SOCKET; goto err1; }
-
- if (bgp_setup_auth(p, 1) < 0)
- { err_val = BEM_INVALID_MD5; goto err2; }
+ bgp_open(p);
if (p->cf->bfd)
bgp_update_bfd(p, p->cf->bfd);
@@ -324,24 +397,28 @@ bgp_initiate(struct bgp_proto *p)
}
else
bgp_startup(p);
+}
- return;
-
-err2:
- bgp_close(p);
-err1:
- p->p.disabled = 1;
- bgp_store_error(p, NULL, BE_MISC, err_val);
-
- p->neigh = NULL;
- proto_notify_state(&p->p, PS_DOWN);
-
- return;
+static void
+bgp_initiate_disable(struct bgp_proto *p, int err_val)
+{
+ PROTO_LOCKED_FROM_MAIN(&p->p)
+ {
+ /* The protocol may be already down for another reason.
+ * Shutdown the protocol only if it isn't already shutting down. */
+ switch (p->p.proto_state)
+ {
+ case PS_START:
+ case PS_UP:
+ p->p.disabled = 1;
+ bgp_store_error(p, NULL, BE_MISC, err_val);
+ bgp_stop(p, err_val, NULL, 0);
+ }
+ }
}
/**
* bgp_start_timer - start a BGP timer
- * @p: bgp_proto which the timer belongs to
* @t: timer
* @value: time (in seconds) to fire (0 to disable the timer)
*
@@ -352,8 +429,6 @@ err1:
void
bgp_start_timer(struct bgp_proto *p, timer *t, uint value)
{
- BGP_ASSERT_INSIDE(p);
-
if (value)
{
/* The randomization procedure is specified in RFC 4271 section 10 */
@@ -375,7 +450,7 @@ bgp_start_timer(struct bgp_proto *p, timer *t, uint value)
void
bgp_close_conn(struct bgp_conn *conn)
{
- BGP_ASSERT_INSIDE(conn->bgp);
+ // struct bgp_proto *p = conn->bgp;
DBG("BGP: Closing connection\n");
conn->packets_to_send = 0;
@@ -386,8 +461,10 @@ bgp_close_conn(struct bgp_conn *conn)
conn->keepalive_timer = NULL;
rfree(conn->hold_timer);
conn->hold_timer = NULL;
+
rfree(conn->tx_ev);
conn->tx_ev = NULL;
+
rfree(conn->sk);
conn->sk = NULL;
@@ -467,8 +544,6 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len
static void
bgp_down(struct bgp_proto *p)
{
- bgp_start_timer(p, p->startup_timer, 0);
-
if (p->start_state > BSS_PREPARE)
{
bgp_setup_auth(p, 0);
@@ -482,34 +557,21 @@ bgp_down(struct bgp_proto *p)
}
static void
-bgp_active_event(void *vp)
+bgp_decision(void *vp)
{
struct bgp_proto *p = vp;
- BGP_ASSERT_INSIDE(p);
-
- DBG("%s: Decision start\n", p->p.name);
+ DBG("BGP: Decision start\n");
if ((p->p.proto_state == PS_START) &&
(p->outgoing_conn.state == BS_IDLE) &&
(p->incoming_conn.state != BS_OPENCONFIRM) &&
!p->passive)
bgp_active(p);
-}
-
-static void
-bgp_down_event(void *vp)
-{
- struct bgp_proto *p = vp;
-
- BGP_ENTER(p);
- DBG("%s: Down event\n", p->p.name);
if ((p->p.proto_state == PS_STOP) &&
(p->outgoing_conn.state == BS_IDLE) &&
(p->incoming_conn.state == BS_IDLE))
bgp_down(p);
-
- BGP_LEAVE(p);
}
static struct bgp_proto *
@@ -539,9 +601,16 @@ void
bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
{
proto_notify_state(&p->p, PS_STOP);
+ p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
- ev_send_loop(&main_birdloop, p->down_event);
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->ptx)
+ bgp_free_pending_tx(c);
+
+ proto_send_event(&p->p, p->event);
}
static inline void
@@ -588,7 +657,6 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->link_addr = p->neigh->iface->llv6->ip;
conn->sk->fast_rx = 0;
- conn->sk->cork = &rt_cork;
p->conn = conn;
p->last_error_class = 0;
@@ -614,7 +682,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
/* Summary state of ADD_PATH RX for active channels */
uint summary_add_path_rx = 0;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
@@ -635,7 +703,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
int active = loc->ready && rem->ready;
c->c.disabled = !active;
- c->c.reloadable = p->route_refresh || c->cf->import_table;
+ c->c.reloadable = p->route_refresh || ((c->c.in_keep & RIK_PREFILTER) == RIK_PREFILTER);
c->index = active ? num++ : 0;
@@ -696,7 +764,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->channel_count = num;
p->summary_add_path_rx = summary_add_path_rx;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
if (c->c.disabled)
continue;
@@ -747,8 +815,7 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn)
bgp_close_conn(conn);
bgp_conn_set_state(conn, BS_IDLE);
- ev_send_loop(p->p.loop, p->active_event);
- ev_send_loop(&main_birdloop, p->down_event);
+ proto_send_event(&p->p, p->event);
if (os == BS_ESTABLISHED)
bgp_conn_leave_established_state(p);
@@ -776,7 +843,7 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
p->gr_active_num = 0;
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
/* FIXME: perhaps check for channel state instead of disabled flag? */
if (c->c.disabled)
@@ -790,16 +857,14 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
{
case BGP_GRS_NONE:
c->gr_active = BGP_GRS_ACTIVE;
- channel_refresh_begin(&c->c);
- break;
+ /* fall through */
case BGP_GRS_ACTIVE:
- channel_refresh_end(&c->c);
- channel_refresh_begin(&c->c);
+ rt_refresh_begin(&c->c.in_req);
break;
case BGP_GRS_LLGR:
- channel_refresh_begin(&c->c);
+ rt_refresh_begin(&c->c.in_req);
bgp_graceful_restart_feed(c);
break;
}
@@ -807,15 +872,13 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
else
{
/* Just flush the routes */
- channel_refresh_begin(&c->c);
- channel_refresh_end(&c->c);
+ rt_refresh_begin(&c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
/* Reset bucket and prefix tables */
- bgp_free_bucket_table(c);
- bgp_free_prefix_table(c);
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ bgp_free_pending_tx(c);
+ bgp_init_pending_tx(c);
c->packets_to_send = 0;
}
@@ -871,6 +934,8 @@ bgp_graceful_restart_feed(struct bgp_channel *c)
}
+
+
/**
* bgp_graceful_restart_done - finish active BGP graceful restart
* @c: BGP channel
@@ -893,11 +958,8 @@ bgp_graceful_restart_done(struct bgp_channel *c)
if (!p->gr_active_num)
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
- if (c->stale_feed.hook)
- rt_stop_export(&c->stale_feed, bgp_graceful_restart_feed_done);
-
tm_stop(c->stale_timer);
- channel_refresh_end_reload(&c->c);
+ rt_refresh_end(&c->c.in_req);
}
/**
@@ -919,7 +981,7 @@ bgp_graceful_restart_timeout(timer *t)
if (p->llgr_ready)
{
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
/* Channel is not in GR and is already flushed */
if (!c->gr_active)
@@ -976,11 +1038,8 @@ bgp_refresh_begin(struct bgp_channel *c)
if (c->load_state == BFS_LOADING)
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
- if (c->load_state == BFS_REFRESHING)
- channel_refresh_end(&c->c);
-
c->load_state = BFS_REFRESHING;
- channel_refresh_begin(&c->c);
+ rt_refresh_begin(&c->c.in_req);
}
/**
@@ -1001,7 +1060,7 @@ bgp_refresh_end(struct bgp_channel *c)
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
c->load_state = BFS_NONE;
- channel_refresh_end_reload(&c->c);
+ rt_refresh_end(&c->c.in_req);
}
@@ -1137,12 +1196,10 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
static void
bgp_setup_sk(struct bgp_conn *conn, sock *s)
{
- ASSERT_DIE(s->flags & SKF_THREAD);
s->data = conn;
s->err_hook = bgp_sock_err;
s->fast_rx = 1;
conn->sk = s;
- sk_start(s);
}
static void
@@ -1151,8 +1208,6 @@ bgp_active(struct bgp_proto *p)
int delay = MAX(1, p->cf->connect_delay_time);
struct bgp_conn *conn = &p->outgoing_conn;
- BGP_ASSERT_INSIDE(p);
-
BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
bgp_setup_conn(p, conn);
bgp_conn_set_state(conn, BS_ACTIVE);
@@ -1173,12 +1228,9 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
struct bgp_conn *conn = &p->outgoing_conn;
int hops = p->cf->multihop ? : 1;
- BGP_ASSERT_INSIDE(p);
-
DBG("BGP: Connecting\n");
sock *s = sk_new(p->p.pool);
s->type = SK_TCP_ACTIVE;
- s->flags |= SKF_THREAD;
s->saddr = p->local_ip;
s->daddr = p->remote_ip;
s->dport = p->cf->remote_port;
@@ -1190,6 +1242,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
s->tos = IP_PREC_INTERNET_CONTROL;
s->password = p->cf->password;
s->tx_hook = bgp_connected;
+ s->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
@@ -1197,7 +1250,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
bgp_setup_sk(conn, s);
bgp_conn_set_state(conn, BS_CONNECT);
- if (sk_open(s) < 0)
+ if (sk_open(s, p->p.loop) < 0)
goto err;
/* Set minimal receive TTL if needed */
@@ -1227,12 +1280,17 @@ static struct bgp_proto *
bgp_find_proto(sock *sk)
{
struct bgp_proto *best = NULL;
- struct bgp_proto *p;
+ struct bgp_socket *bs = sk->data;
+ struct bgp_listen_request *req;
/* sk->iface is valid only if src or dst address is link-local */
int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
- WALK_LIST(p, proto_list)
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ WALK_LIST(req, bs->requests)
+ {
+ struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req);
if ((p->p.proto == &proto_bgp) &&
(ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
(!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
@@ -1246,7 +1304,9 @@ bgp_find_proto(sock *sk)
if (!bgp_is_dynamic(p))
break;
}
+ }
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
return best;
}
@@ -1265,6 +1325,8 @@ bgp_find_proto(sock *sk)
static int
bgp_incoming_connection(sock *sk, uint dummy UNUSED)
{
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
+
struct bgp_proto *p;
int acc, hops;
@@ -1278,17 +1340,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
return 0;
}
- if (p->p.loop == &main_birdloop)
- {
- /* Protocol is down for whatever reason. No need for locking. */
- BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) rejected (protocol is down)",
- sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
- sk->dport);
- rfree(sk);
- return 0;
- }
-
- BGP_ENTER(p);
+ birdloop_enter(p->p.loop);
/*
* BIRD should keep multiple incoming connections in OpenSent state (for
@@ -1319,8 +1371,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
if (!acc)
{
rfree(sk);
- BGP_LEAVE(p);
- return 0;
+ goto leave;
}
hops = p->cf->multihop ? : 1;
@@ -1345,22 +1396,24 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
p = bgp_spawn(p, sk->daddr);
p->postponed_sk = sk;
rmove(sk, p->p.pool);
- BGP_LEAVE(p);
- return 0;
+ goto leave;
}
rmove(sk, p->p.pool);
+ sk_reloop(sk, p->p.loop);
+
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
bgp_send_open(&p->incoming_conn);
- BGP_LEAVE(p);
- return 0;
+ goto leave;
err:
sk_log_error(sk, p->p.name);
log(L_ERR "%s: Incoming connection aborted", p->p.name);
rfree(sk);
- BGP_LEAVE(p);
+
+leave:
+ birdloop_leave(p->p.loop);
return 0;
}
@@ -1395,9 +1448,10 @@ bgp_neigh_notify(neighbor *n)
struct bgp_proto *p = (struct bgp_proto *) n->proto;
int ps = p->p.proto_state;
- BGP_ASSERT_INSIDE(p);
+ if (n != p->neigh)
+ return;
- if ((n != p->neigh) || (ps == PS_DOWN) || (ps == PS_STOP))
+ if ((ps == PS_DOWN) || (ps == PS_STOP))
return;
int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
@@ -1470,15 +1524,22 @@ static void
bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd)
{
if (bfd && p->bfd_req)
+ {
+ BGP_TRACE(D_EVENTS, "Updating existing BFD request");
bfd_update_request(p->bfd_req, bfd);
+ }
if (bfd && !p->bfd_req && !bgp_is_dynamic(p))
+ {
p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
p->cf->multihop ? NULL : p->neigh->iface,
- p->p.vrf, bgp_bfd_notify, p, birdloop_event_list(p->p.loop), bfd);
+ p->p.vrf, bgp_bfd_notify, p, p->p.loop, bfd);
+ BGP_TRACE(D_EVENTS, "Requesting a new BFD session");
+ }
if (!bfd && p->bfd_req)
{
+ BGP_TRACE(D_EVENTS, "Retracting the BFD request");
rfree(p->bfd_req);
p->bfd_req = NULL;
}
@@ -1490,8 +1551,11 @@ bgp_reload_routes(struct channel *C)
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
- ASSERT(p->conn && (p->route_refresh));
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
+ ASSERT(p->conn && p->route_refresh);
bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
}
@@ -1501,6 +1565,10 @@ bgp_feed_begin(struct channel *C, int initial)
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
+
/* This should not happen */
if (!p->conn)
return;
@@ -1508,6 +1576,12 @@ bgp_feed_begin(struct channel *C, int initial)
if (initial && p->cf->gr_mode)
c->feed_state = BFS_LOADING;
+ if (!initial && C->out_table)
+ {
+ c->feed_out_table = 1;
+ return;
+ }
+
/* It is refeed and both sides support enhanced route refresh */
if (!initial && p->enhanced_refresh)
{
@@ -1526,6 +1600,16 @@ bgp_feed_end(struct channel *C)
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
+
+ if (c->feed_out_table)
+ {
+ c->feed_out_table = 0;
+ return;
+ }
+
/* This should not happen */
if (!p->conn)
return;
@@ -1548,17 +1632,14 @@ bgp_feed_end(struct channel *C)
static void
-bgp_start_locked(struct object_lock *lock)
+bgp_start_locked(void *_p)
{
- struct bgp_proto *p = lock->data;
+ struct bgp_proto *p = _p;
const struct bgp_config *cf = p->cf;
- BGP_ENTER(p);
-
if (p->p.proto_state != PS_START)
{
DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
- BGP_LEAVE(p);
return;
}
@@ -1568,11 +1649,10 @@ bgp_start_locked(struct object_lock *lock)
{
/* Multi-hop sessions do not use neighbor entries */
bgp_initiate(p);
- BGP_LEAVE(p);
return;
}
- neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY | NEF_NOTIFY_MAIN);
+ neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
if (!n)
{
log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
@@ -1580,7 +1660,6 @@ bgp_start_locked(struct object_lock *lock)
p->p.disabled = 1;
bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
proto_notify_state(&p->p, PS_DOWN);
- BGP_LEAVE(p);
return;
}
@@ -1592,8 +1671,6 @@ bgp_start_locked(struct object_lock *lock)
BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
else
bgp_start_neighbor(p);
-
- BGP_LEAVE(p);
}
static int
@@ -1632,13 +1709,12 @@ bgp_start(struct proto *P)
p->stats.rx_bytes = p->stats.tx_bytes = 0;
p->last_rx_update = 0;
- p->active_event = ev_new_init(p->p.pool, bgp_active_event, p);
- p->down_event = ev_new_init(p->p.pool, bgp_down_event, p);
+ p->event = ev_new_init(p->p.pool, bgp_decision, p);
+ p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p);
+
p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
- p->rx_lp = lp_new_default(p->p.pool);
-
p->local_id = proto_get_router_id(P->cf);
if (p->rr_client)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
@@ -1650,7 +1726,7 @@ bgp_start(struct proto *P)
if (p->p.gr_recovery && p->cf->gr_mode)
{
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
channel_graceful_restart_lock(&c->c);
}
@@ -1665,8 +1741,11 @@ bgp_start(struct proto *P)
lock->iface = p->cf->iface;
lock->vrf = p->cf->iface ? NULL : p->p.vrf;
lock->type = OBJLOCK_TCP;
- lock->hook = bgp_start_locked;
- lock->data = p;
+ lock->event = (event) {
+ .hook = bgp_start_locked,
+ .data = p,
+ };
+ lock->target = proto_event_list(P);
/* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
if (bgp_is_dynamic(p))
@@ -1782,7 +1861,7 @@ bgp_init(struct proto_config *CF)
P->rt_notify = bgp_rt_notify;
P->preexport = bgp_preexport;
- P->neigh_notify = bgp_neigh_notify;
+ P->iface_sub.neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
P->feed_begin = bgp_feed_begin;
P->feed_end = bgp_feed_end;
@@ -1809,7 +1888,7 @@ bgp_init(struct proto_config *CF)
/* Add all channels */
struct bgp_channel_config *cc;
- WALK_LIST(cc, CF->channels)
+ BGP_CF_WALK_CHANNELS(cf, cc)
proto_add_channel(P, &cc->c);
return P;
@@ -1830,6 +1909,9 @@ bgp_channel_init(struct channel *C, struct channel_config *CF)
if (cf->igp_table_ip6)
c->igp_table_ip6 = cf->igp_table_ip6->table;
+
+ if (cf->base_table)
+ c->base_table = cf->base_table->table;
}
static int
@@ -1840,22 +1922,23 @@ bgp_channel_start(struct channel *C)
ip_addr src = p->local_ip;
if (c->igp_table_ip4)
- RT_LOCKED(c->igp_table_ip4, t)
- rt_lock_table(t);
+ rt_lock_table(c->igp_table_ip4);
if (c->igp_table_ip6)
- RT_LOCKED(c->igp_table_ip6, t)
- rt_lock_table(t);
+ rt_lock_table(c->igp_table_ip6);
- c->pool = p->p.pool; // XXXX
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ if (c->base_table)
+ {
+ rt_lock_table(c->base_table);
+ rt_flowspec_link(c->base_table, c->c.table);
+ }
- if (c->cf->import_table)
- channel_setup_in_table(C, 0);
+ c->pool = p->p.pool; // XXXX
if (c->cf->export_table)
- channel_setup_out_table(C);
+ bgp_setup_out_table(c);
+
+ bgp_init_pending_tx(c);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
@@ -1926,12 +2009,16 @@ bgp_channel_cleanup(struct channel *C)
struct bgp_channel *c = (void *) C;
if (c->igp_table_ip4)
- RT_LOCKED(c->igp_table_ip4, t)
- rt_unlock_table(t);
+ rt_unlock_table(c->igp_table_ip4);
if (c->igp_table_ip6)
- RT_LOCKED(c->igp_table_ip6, t)
- rt_unlock_table(t);
+ rt_unlock_table(c->igp_table_ip6);
+
+ if (c->base_table)
+ {
+ rt_flowspec_unlink(c->base_table, c->c.table);
+ rt_unlock_table(c->base_table);
+ }
c->index = 0;
@@ -1944,7 +2031,7 @@ bgp_find_channel_config(struct bgp_config *cf, u32 afi)
{
struct bgp_channel_config *cc;
- WALK_LIST(cc, cf->c.channels)
+ BGP_CF_WALK_CHANNELS(cf, cc)
if (cc->afi == afi)
return cc;
@@ -1974,12 +2061,31 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32
return cc2->c.table;
/* Last, try default table of given type */
- if (tab = cf->c.global->def_tables[type])
+ if (tab = rt_get_default_table(cf->c.global, type))
return tab;
cf_error("Undefined IGP table");
}
+static struct rtable_config *
+bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc)
+{
+ /* Expected table type */
+ u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6;
+
+ /* First, try appropriate IP channel */
+ u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST);
+ struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2);
+ if (cc2 && (cc2->c.table->addr_type == type))
+ return cc2->c.table;
+
+ /* Last, try default table of given type */
+ struct rtable_config *tab = rt_get_default_table(cf->c.global, type);
+ if (tab)
+ return tab;
+
+ cf_error("Undefined base table");
+}
void
bgp_postconfig(struct proto_config *CF)
@@ -2042,6 +2148,15 @@ bgp_postconfig(struct proto_config *CF)
if (internal && cf->rs_client)
cf_error("Only external neighbor can be RS client");
+ if (internal && (cf->local_role != BGP_ROLE_UNDEFINED))
+ cf_error("Local role cannot be set on IBGP sessions");
+
+ if (interior && (cf->local_role != BGP_ROLE_UNDEFINED))
+ log(L_WARN "BGP roles are not recommended to be used within AS confederations");
+
+ if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED))
+ cf_error("Local role must be set if roles are required");
+
if (!cf->confederation && cf->confederation_member)
cf_error("Confederation ID must be set for member sessions");
@@ -2064,9 +2179,24 @@ bgp_postconfig(struct proto_config *CF)
if (internal && cf->enforce_first_as)
cf_error("Enforce first AS check is requires EBGP sessions");
+ if (cf->keepalive_time > cf->hold_time)
+ cf_error("Keepalive time must be at most hold time");
+
+ if (cf->keepalive_time > (cf->hold_time / 2))
+ log(L_WARN "Keepalive time should be at most 1/2 of hold time");
+
+ if (cf->min_hold_time > cf->hold_time)
+ cf_error("Min hold time (%u) exceeds hold time (%u)",
+ cf->min_hold_time, cf->hold_time);
+
+ uint keepalive_time = cf->keepalive_time ?: cf->hold_time / 3;
+ if (cf->min_keepalive_time > keepalive_time)
+ cf_error("Min keepalive time (%u) exceeds keepalive time (%u)",
+ cf->min_keepalive_time, keepalive_time);
+
struct bgp_channel_config *cc;
- WALK_LIST(cc, CF->channels)
+ BGP_CF_WALK_CHANNELS(cf, cc)
{
/* Handle undefined import filter */
if (cc->c.in_filter == FILTER_UNDEF)
@@ -2094,6 +2224,10 @@ bgp_postconfig(struct proto_config *CF)
if (!cc->gw_mode)
cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
+ /* Different default for next_hop_prefer */
+ if (!cc->next_hop_prefer)
+ cc->next_hop_prefer = (cc->gw_mode == GW_DIRECT) ? NHP_GLOBAL : NHP_LOCAL;
+
/* Defaults based on proto config */
if (cc->gr_able == 0xff)
cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
@@ -2124,6 +2258,14 @@ bgp_postconfig(struct proto_config *CF)
cf_error("Mismatched IGP table type");
}
+ /* Default value of base table */
+ if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table)
+ cc->base_table = bgp_default_base_table(cf, cc);
+
+ if (cc->base_table && !cc->base_table->trie_used)
+ cf_error("Flowspec validation requires base table (%s) with trie",
+ cc->base_table->name);
+
if (cf->multihop && (cc->gw_mode == GW_DIRECT))
cf_error("Multihop BGP cannot use direct gateway mode");
@@ -2165,20 +2307,16 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
WALK_LIST(C, p->p.channels)
C->stale = 1;
- WALK_LIST(cc, new->c.channels)
+ BGP_CF_WALK_CHANNELS(new, cc)
{
C = (struct channel *) bgp_find_channel(p, cc->afi);
same = proto_configure_channel(P, &C, &cc->c) && same;
-
- if (C)
- C->stale = 0;
}
WALK_LIST_DELSAFE(C, C2, p->p.channels)
if (C->stale)
same = proto_configure_channel(P, &C, NULL) && same;
-
if (same && (p->start_state > BSS_PREPARE))
bgp_update_bfd(p, new->bfd);
@@ -2192,7 +2330,7 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
return same;
}
-#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
+#define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL )
static int
bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed)
@@ -2203,29 +2341,33 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor
struct bgp_channel_config *old = c->cf;
if ((new->secondary != old->secondary) ||
+ (new->validate != old->validate) ||
(new->gr_able != old->gr_able) ||
(new->llgr_able != old->llgr_able) ||
(new->llgr_time != old->llgr_time) ||
(new->ext_next_hop != old->ext_next_hop) ||
(new->add_path != old->add_path) ||
- (new->import_table != old->import_table) ||
(new->export_table != old->export_table) ||
- (IGP_TABLE(new, ip4) != IGP_TABLE(old, ip4)) ||
- (IGP_TABLE(new, ip6) != IGP_TABLE(old, ip6)))
+ (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) ||
+ (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) ||
+ (TABLE(new, base_table) != TABLE(old, base_table)))
return 0;
if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
return 0;
if ((new->gw_mode != old->gw_mode) ||
+ (new->next_hop_prefer != old->next_hop_prefer) ||
(new->aigp != old->aigp) ||
(new->cost != old->cost))
{
- /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
- if (c->c.in_table && (c->c.channel_state == CS_UP))
+ /* If import table is active and route refresh is possible, we just ask for route refresh */
+ if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP) && p->route_refresh)
bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
- *import_changed = 1;
+ /* Otherwise we do complete reload */
+ else
+ *import_changed = 1;
}
if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
@@ -2394,6 +2536,15 @@ bgp_show_afis(int code, char *s, u32 *afis, uint count)
cli_msg(code, b.start);
}
+const char *
+bgp_format_role_name(u8 role)
+{
+ static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" };
+ if (role == BGP_ROLE_UNDEFINED) return "undefined";
+ if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role];
+ return "?";
+}
+
static void
bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
{
@@ -2522,6 +2673,9 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
if (caps->hostname)
cli_msg(-1006, " Hostname: %s", caps->hostname);
+
+ if (caps->role != BGP_ROLE_UNDEFINED)
+ cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role));
}
static void
@@ -2529,9 +2683,6 @@ bgp_show_proto_info(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- if (p->p.proto_state != PS_DOWN)
- BGP_ASSERT_INSIDE(p);
-
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
if (bgp_is_dynamic(p) && p->cf->remote_range)
@@ -2539,6 +2690,9 @@ bgp_show_proto_info(struct proto *P)
else
cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
+ if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT))
+ cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port);
+
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
cli_msg(-1006, " Local AS: %u", p->cf->local_as);
@@ -2581,6 +2735,9 @@ bgp_show_proto_info(struct proto *P)
tm_remains(p->conn->hold_timer), p->conn->hold_time);
cli_msg(-1006, " Keepalive timer: %t/%u",
tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
+ cli_msg(-1006, " TX pending: %d bytes%s",
+ p->conn->sk->tpos - p->conn->sk->ttx,
+ ev_active(p->conn->tx_ev) ? " (refill scheduled)" : "");
}
#if 0
@@ -2609,6 +2766,9 @@ bgp_show_proto_info(struct proto *P)
{
channel_show_info(&c->c);
+ if (c->c.channel != &channel_bgp)
+ continue;
+
if (p->gr_active_num)
cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
@@ -2628,6 +2788,25 @@ bgp_show_proto_info(struct proto *P)
if (c->igp_table_ip6)
cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
+
+ if (c->base_table)
+ cli_msg(-1006, " Base table: %s", c->base_table->name);
+
+ uint bucket_cnt = 0;
+ uint prefix_cnt = 0;
+ struct bgp_bucket *buck;
+ struct bgp_prefix *px;
+ if (c->ptx)
+ WALK_LIST(buck, c->ptx->bucket_queue)
+ {
+ bucket_cnt++;
+ WALK_LIST(px, buck->prefixes)
+ if (px->cur)
+ prefix_cnt++;
+ }
+
+ cli_msg(-1006, " Pending %u attribute sets with total %u prefixes to send",
+ bucket_cnt, prefix_cnt);
}
}
}
@@ -2645,7 +2824,6 @@ struct channel_class channel_bgp = {
struct protocol proto_bgp = {
.name = "BGP",
.template = "bgp%d",
- .class = PROTOCOL_BGP,
.preference = DEF_PREF_BGP,
.channel_mask = NB_IP | NB_VPN | NB_FLOW,
.proto_size = sizeof(struct bgp_proto),
@@ -2657,6 +2835,12 @@ struct protocol proto_bgp = {
.reconfigure = bgp_reconfigure,
.copy_config = bgp_copy_config,
.get_status = bgp_get_status,
- .get_attr = bgp_get_attr,
.show_proto_info = bgp_show_proto_info
};
+
+void bgp_build(void)
+{
+ proto_build(&proto_bgp);
+ bgp_register_attrs();
+ bgp_listen_domain = DOMAIN_NEW(rtable, "BGP Listen Sockets");
+}
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 2a2fe689..6f75874f 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -14,13 +14,12 @@
#include <stdint.h>
#include <setjmp.h>
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/bfd.h"
//#include "lib/lists.h"
#include "lib/hash.h"
#include "lib/socket.h"
-struct linpool;
struct eattr;
@@ -68,10 +67,10 @@ struct bgp_af_desc {
u8 no_igp;
const char *name;
uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
- void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a);
void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to);
uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size);
- void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, ea_list **to);
};
@@ -98,7 +97,7 @@ struct bgp_config {
int capabilities; /* Enable capability handshake [RFC 5492] */
int enable_refresh; /* Enable local support for route refresh [RFC 2918] */
int enable_as4; /* Enable local support for 4B AS numbers [RFC 6793] */
- int enable_extended_messages; /* Enable local support for extended messages [draft] */
+ int enable_extended_messages; /* Enable local support for extended messages [RFC 8654] */
int enable_hostname; /* Enable local support for hostname [draft] */
u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */
int rr_client; /* Whether neighbor is RR client of me */
@@ -109,18 +108,24 @@ struct bgp_config {
int interpret_communities; /* Hardwired handling of well-known communities */
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */
+ int allow_med; /* Allow BGP_MED in EBGP sessions */
int allow_as_sets; /* Allow AS_SETs in incoming AS_PATHs */
int enforce_first_as; /* Enable check for neighbor AS as first AS in AS_PATH */
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */
int setkey; /* Set MD5 password to system SA/SP database */
+ u8 local_role; /* Set peering role with neighbor [RFC 9234] */
+ int require_roles; /* Require configured roles on both sides */
/* Times below are in seconds */
unsigned gr_time; /* Graceful restart timeout */
unsigned llgr_time; /* Long-lived graceful restart stale time */
unsigned connect_delay_time; /* Minimum delay between connect attempts */
unsigned connect_retry_time; /* Timeout for connect attempts */
- unsigned hold_time, initial_hold_time;
+ unsigned hold_time;
+ unsigned min_hold_time; /* Minimum accepted hold time */
+ unsigned initial_hold_time;
unsigned keepalive_time;
+ unsigned min_keepalive_time; /* Minimum accepted keepalive time */
unsigned error_amnesia_time; /* Errors are forgotten after */
unsigned error_delay_time_min; /* Time to wait after an error is detected */
unsigned error_delay_time_max;
@@ -144,9 +149,11 @@ struct bgp_channel_config {
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */
u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */
+ u8 next_hop_prefer; /* Prefer global or link-local next hop (NHP_*) */
u8 mandatory; /* Channel is mandatory in capability negotiation */
u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
+ u8 validate; /* Validate Flowspec per RFC 8955 (6) */
u8 gr_able; /* Allow full graceful restart for the channel */
u8 llgr_able; /* Allow full long-lived GR for the channel */
uint llgr_time; /* Long-lived graceful restart stale time */
@@ -156,15 +163,23 @@ struct bgp_channel_config {
u8 aigp_originate; /* AIGP is originated automatically */
u32 cost; /* IGP cost for direct next hops */
u8 import_table; /* Use c.in_table as Adj-RIB-In */
- u8 export_table; /* Use c.out_table as Adj-RIB-Out */
+ u8 export_table; /* Keep Adj-RIB-Out and export it */
struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+ struct rtable_config *base_table; /* Base table for Flowspec validation */
};
#define BGP_PT_INTERNAL 1
#define BGP_PT_EXTERNAL 2
+#define BGP_ROLE_UNDEFINED 255
+#define BGP_ROLE_PROVIDER 0
+#define BGP_ROLE_RS_SERVER 1
+#define BGP_ROLE_RS_CLIENT 2
+#define BGP_ROLE_CUSTOMER 3
+#define BGP_ROLE_PEER 4
+
#define NH_NO 0
#define NH_ALL 1
#define NH_IBGP 2
@@ -177,6 +192,9 @@ struct bgp_channel_config {
#define GW_DIRECT 1
#define GW_RECURSIVE 2
+#define NHP_GLOBAL 1
+#define NHP_LOCAL 2
+
#define BGP_ADD_PATH_RX 1
#define BGP_ADD_PATH_TX 2
#define BGP_ADD_PATH_FULL 3
@@ -203,8 +221,6 @@ struct bgp_channel_config {
/* rte->pflags */
#define BGP_REF_SUPPRESSED 0x1 /* Used for deterministic MED comparison */
-#define BGP_REF_STALE 0x2 /* Route is LLGR_STATE */
-#define BGP_REF_NOT_STALE 0x4 /* Route is NOT LLGR_STATE */
struct bgp_af_caps {
u32 afi;
@@ -222,9 +238,10 @@ struct bgp_caps {
u32 as4_number; /* Announced ASN */
u8 as4_support; /* Four-octet AS capability, RFC 6793 */
- u8 ext_messages; /* Extended message length, RFC draft */
+ u8 ext_messages; /* Extended message length, RFC 8654 */
u8 route_refresh; /* Route refresh capability, RFC 2918 */
u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */
+ u8 role; /* BGP role capability, RFC 9234 */
u8 gr_aware; /* Graceful restart capability, RFC 4724 */
u8 gr_flags; /* Graceful restart flags */
@@ -248,8 +265,8 @@ struct bgp_caps {
struct bgp_socket {
node n; /* Node in global bgp_sockets */
+ list requests; /* Listen requests */
sock *sk; /* Real listening socket */
- u32 uc; /* Use count */
};
struct bgp_stats {
@@ -284,6 +301,16 @@ struct bgp_conn {
uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
+struct bgp_listen_request {
+ node n; /* Node in bgp_socket / pending list */
+ struct bgp_socket *sock; /* Assigned socket */
+ ip_addr addr;
+ struct iface *iface;
+ struct iface *vrf;
+ uint port;
+ uint flags;
+};
+
struct bgp_proto {
struct proto p;
const struct bgp_config *cf; /* Shortcut to BGP configuration */
@@ -313,18 +340,17 @@ struct bgp_proto {
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
- struct linpool *rx_lp; /* Linpool for parsing received updates */
struct object_lock *lock; /* Lock for neighbor connection */
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
- struct bgp_socket *sock; /* Shared listening socket */
+ struct bgp_listen_request listen; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */
+ event *uncork_ev; /* Uncork event in case of congestion */
struct bgp_stats stats; /* BGP statistics */
btime last_established; /* Last time of enter/leave of established state */
btime last_rx_update; /* Last time of RX update */
ip_addr link_addr; /* Link-local version of local_ip */
- event *active_event; /* Event for respawning */
- event *down_event; /* Event to shut down */
+ event *event; /* Event for respawning and shutting process */
timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
int dynamic_name_counter; /* Counter for dynamic BGP names */
@@ -347,15 +373,12 @@ struct bgp_channel {
rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+ rtable *base_table; /* Base table for Flowspec validation */
/* Rest are zeroed when down */
pool *pool;
- HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
- struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
- list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
-
- HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
- slab *prefix_slab; /* Slab holding prefix nodes */
+ struct bgp_pending_tx *ptx; /* Routes waiting to be sent */
+ struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@@ -376,11 +399,16 @@ struct bgp_channel {
u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */
u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */
+
+ u8 feed_out_table; /* Refeed into out_table */
};
struct bgp_prefix {
- node buck_node; /* Node in per-bucket list */
+ node buck_node_xx; /* Node in per-bucket list */
struct bgp_prefix *next; /* Node in prefix hash table */
+ struct bgp_bucket *last; /* Last bucket sent with this prefix */
+ struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */
+ btime lastmod; /* Last modification of this prefix */
u32 hash;
u32 path_id;
net_addr net[0];
@@ -389,11 +417,24 @@ struct bgp_prefix {
struct bgp_bucket {
node send_node; /* Node in send queue */
struct bgp_bucket *next; /* Node in bucket hash table */
- list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */
+ list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */
u32 hash; /* Hash over extended attributes */
+ u32 px_uc; /* How many prefixes are linking this bucket */
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
+struct bgp_pending_tx {
+ resource r;
+ pool *pool;
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
+
+ HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+};
+
struct bgp_export_state {
struct bgp_proto *proto;
struct bgp_channel *channel;
@@ -404,7 +445,7 @@ struct bgp_export_state {
int mpls;
u32 attrs_seen[1];
- uint err_withdraw;
+ uint err_reject;
uint local_next_hop;
};
@@ -430,6 +471,7 @@ struct bgp_parse_state {
int as4_session;
int add_path;
int mpls;
+ int reach_nlri_step;
u32 attrs_seen[256/32];
@@ -456,13 +498,12 @@ struct bgp_parse_state {
uint err_subcode;
jmp_buf err_jmpbuf;
- struct hostentry *hostentry;
adata *mpls_labels;
/* Cached state for bgp_rte_update() */
u32 last_id;
struct rte_src *last_src;
- rta *cached_rta;
+ ea_list *cached_ea;
};
#define BGP_PORT 179
@@ -475,6 +516,9 @@ struct bgp_parse_state {
#define BGP_RX_BUFFER_EXT_SIZE 65535
#define BGP_TX_BUFFER_EXT_SIZE 65535
+#define BGP_CF_WALK_CHANNELS(P,C) WALK_LIST(C, P->c.channels) if (C->c.channel == &channel_bgp)
+#define BGP_WALK_CHANNELS(P,C) WALK_LIST(C, P->p.channels) if (C->c.channel == &channel_bgp)
+
static inline int bgp_channel_is_ipv4(struct bgp_channel *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; }
@@ -487,6 +531,12 @@ static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+static inline int bgp_channel_is_role_applicable(struct bgp_channel *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
+static inline int bgp_cc_is_role_applicable(struct bgp_channel_config *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
static inline uint bgp_max_packet_length(struct bgp_conn *conn)
{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
@@ -513,14 +563,17 @@ void bgp_refresh_begin(struct bgp_channel *c);
void bgp_refresh_end(struct bgp_channel *c);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len);
-
-struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
-struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
+const char *bgp_format_role_name(u8 role);
static inline int
-rta_resolvable(rta *a)
+rte_resolvable(const rte *rt)
{
- return a->dest == RTD_UNICAST;
+ eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop);
+ if (!nhea)
+ return 0;
+
+ struct nexthop_adata *nhad = (void *) nhea->u.ptr;
+ return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE);
}
extern struct rte_owner_class bgp_rte_owner_class;
@@ -539,63 +592,41 @@ extern struct rte_owner_class bgp_rte_owner_class;
/* attrs.c */
-static inline eattr *
-bgp_find_attr(ea_list *attrs, uint code)
-{
- return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code));
-}
-
eattr *
-bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val);
-
-static inline void
-bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
-
-static inline void
-bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+bgp_find_attr(ea_list *attrs, uint code);
-static inline void
-bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len)
-{
- struct adata *a = lp_alloc_adata(pool, len);
- bmemcpy(a->data, data, len);
- bgp_set_attr(to, pool, code, flags, (uintptr_t) a);
-}
-
-static inline void
-bgp_unset_attr(ea_list **to, struct linpool *pool, uint code)
-{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; }
+void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val);
+void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad);
+void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len);
+void bgp_unset_attr(ea_list **to, uint code);
int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
-void bgp_finish_attrs(struct bgp_parse_state *s, rta *a);
+void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
+
+void bgp_setup_out_table(struct bgp_channel *c);
+
+void bgp_init_pending_tx(struct bgp_channel *c);
+void bgp_free_pending_tx(struct bgp_channel *c);
-void bgp_init_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b);
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_init_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
+void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
-int bgp_rte_better(struct rte *, struct rte *);
-int bgp_rte_mergable(rte *pri, rte *sec);
-int bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best);
-void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint);
-u32 bgp_rte_igp_metric(struct rte *);
+int bgp_rte_better(const rte *, const rte *);
+int bgp_rte_mergable(const rte *pri, const rte *sec);
+int bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best);
+void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count);
+u32 bgp_rte_igp_metric(const rte *);
void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old);
int bgp_preexport(struct channel *, struct rte *);
-int bgp_get_attr(const struct eattr *e, byte *buf, int buflen);
-void bgp_get_route_info(struct rte *, byte *);
-int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad);
+void bgp_get_route_info(const rte *, byte *);
+int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad);
-static inline struct bgp_proto *bgp_rte_proto(struct rte *rte)
+static inline struct bgp_proto *bgp_rte_proto(const rte *rte)
{
return (rte->src->owner->class == &bgp_rte_owner_class) ?
SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL;
@@ -605,15 +636,17 @@ static inline struct bgp_proto *bgp_rte_proto(struct rte *rte)
#define BGP_AIGP_MAX U64(0xffffffffffffffff)
static inline u64
-bgp_total_aigp_metric(rta *a)
+bgp_total_aigp_metric(const rte *e)
{
u64 metric = BGP_AIGP_MAX;
const struct adata *ad;
- bgp_total_aigp_metric_(a, &metric, &ad);
+ bgp_total_aigp_metric_(e, &metric, &ad);
return metric;
}
+void bgp_register_attrs(void);
+
/* packets.c */
@@ -625,6 +658,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
void bgp_kick_tx(void *vconn);
void bgp_tx(struct birdsock *sk);
int bgp_rx(struct birdsock *sk, uint size);
+void bgp_uncork(void *vp);
const char * bgp_error_dsc(unsigned code, unsigned subcode);
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
@@ -650,26 +684,32 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */
-#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */
-#define BA_AS_PATH 0x02 /* WM */
-#define BA_NEXT_HOP 0x03 /* WM */
-#define BA_MULTI_EXIT_DISC 0x04 /* ON */
-#define BA_LOCAL_PREF 0x05 /* WD */
-#define BA_ATOMIC_AGGR 0x06 /* WD */
-#define BA_AGGREGATOR 0x07 /* OT */
-#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */
-#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */
-#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */
-#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */
-#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */
-#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */
-#define BA_AS4_PATH 0x11 /* RFC 6793 */
-#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
-#define BA_AIGP 0x1a /* RFC 7311 */
-#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
+enum bgp_attr_id {
+ BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */
+ BA_AS_PATH = 0x02, /* WM */
+ BA_NEXT_HOP = 0x03, /* WM */
+ BA_MULTI_EXIT_DISC = 0x04, /* ON */
+ BA_LOCAL_PREF = 0x05, /* WD */
+ BA_ATOMIC_AGGR = 0x06, /* WD */
+ BA_AGGREGATOR = 0x07, /* OT */
+ BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */
+ BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */
+ BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */
+ BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */
+ BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */
+ BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */
+ BA_AS4_PATH = 0x11, /* RFC 6793 */
+ BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */
+ BA_AIGP = 0x1a, /* RFC 7311 */
+ BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */
+#define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */
/* Bird's private internal BGP attributes */
-#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */
+ BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */
+
+/* Maximum */
+ BGP_ATTR_MAX,
+};
/* BGP connection states */
@@ -762,10 +802,5 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define ORIGIN_EGP 1
#define ORIGIN_INCOMPLETE 2
-/* Loop */
-
-#define BGP_ENTER(bgp) birdloop_enter(bgp->p.loop)
-#define BGP_LEAVE(bgp) birdloop_leave(bgp->p.loop)
-#define BGP_ASSERT_INSIDE(bgp) ASSERT_DIE((bgp->p.loop != &main_birdloop) && birdloop_inside(bgp->p.loop))
#endif
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 7c0d8d65..a37bb27a 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -19,19 +19,19 @@ CF_DECLS
CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR,
- START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH,
- BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR,
- BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY,
+ START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER,
+ BGP_LOCAL_PREF, BGP_MED,
SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE,
IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR,
- DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID,
- BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
+ DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES,
+ IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS,
- DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE,
- FIRST, FREE)
+ DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE,
+ FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER,
+ RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC, GLOBAL)
%type <i> bgp_nh
%type <i32> bgp_afi
@@ -40,10 +40,14 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER,
CONFIGURATION, CHANGE, DECONFIGURED, CONNECTION, REJECTED, COLLISION,
OUT, OF, RESOURCES)
-%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag
+%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag bgp_role_name
CF_GRAMMAR
+/* Workaround for collisions between keywords and symbols */
+toksym: ROLE | PEER | PROVIDER | CUSTOMER | RS_SERVER | RS_CLIENT ;
+toksym: BGP_MED | BGP_LOCAL_PREF | SOURCE ;
+
proto: bgp_proto '}' ;
bgp_proto_start: proto_start BGP {
@@ -73,6 +77,7 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->llgr_mode = -1;
BGP_CFG->llgr_time = 3600;
BGP_CFG->setkey = 1;
+ BGP_CFG->local_role = BGP_ROLE_UNDEFINED;
BGP_CFG->dynamic_name = "dynbgp";
BGP_CFG->check_link = -1;
}
@@ -115,6 +120,14 @@ bgp_cease_flag:
| OUT OF RESOURCES { $$ = 1 << 8; }
;
+bgp_role_name:
+ PEER { $$ = BGP_ROLE_PEER; }
+ | PROVIDER { $$ = BGP_ROLE_PROVIDER; }
+ | CUSTOMER { $$ = BGP_ROLE_CUSTOMER; }
+ | RS_SERVER { $$ = BGP_ROLE_RS_SERVER; }
+ | RS_CLIENT { $$ = BGP_ROLE_RS_CLIENT; }
+ ;
+
bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
@@ -144,7 +157,8 @@ bgp_proto:
| bgp_proto RS CLIENT bool ';' { BGP_CFG->rs_client = $4; }
| bgp_proto CONFEDERATION expr ';' { BGP_CFG->confederation = $3; }
| bgp_proto CONFEDERATION MEMBER bool ';' { BGP_CFG->confederation_member = $4; }
- | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; }
+ | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; if (($4 && $4<3) || ($4>65535)) cf_error("Hold time must be in range 3-65535 or zero"); }
+ | bgp_proto MIN HOLD TIME expr ';' { BGP_CFG->min_hold_time = $5; }
| bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; }
| bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; }
| bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
@@ -168,7 +182,8 @@ bgp_proto:
| bgp_proto START DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; log(L_WARN "%s: Start delay time option is deprecated, use connect delay time", this_proto->name); }
| bgp_proto CONNECT DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; }
| bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; }
- | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; }
+ | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; if (($4<1) || ($4>65535)) cf_error("Keepalive time must be in range 1-65535"); }
+ | bgp_proto MIN KEEPALIVE TIME expr ';' { BGP_CFG->min_keepalive_time = $5; }
| bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; }
| bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; }
| bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; }
@@ -185,6 +200,7 @@ bgp_proto:
| bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
| bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; }
+ | bgp_proto ALLOW BGP_MED bool ';' { BGP_CFG->allow_med = $4; }
| bgp_proto ALLOW AS SETS bool ';' { BGP_CFG->allow_as_sets = $5; }
| bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
| bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
@@ -198,6 +214,8 @@ bgp_proto:
| bgp_proto BFD GRACEFUL ';' { init_bfd_opts(&BGP_CFG->bfd); BGP_CFG->bfd->mode = BGP_BFD_GRACEFUL; }
| bgp_proto BFD { open_bfd_opts(&BGP_CFG->bfd); } bfd_opts { close_bfd_opts(); } ';'
| bgp_proto ENFORCE FIRST AS bool ';' { BGP_CFG->enforce_first_as = $5; }
+ | bgp_proto LOCAL ROLE bgp_role_name ';' { BGP_CFG->local_role = $4; }
+ | bgp_proto REQUIRE ROLES bool ';' { BGP_CFG->require_roles = $4; }
;
bgp_afi:
@@ -252,11 +270,17 @@ bgp_channel_item:
| NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; }
| NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; }
| NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; }
+ | NEXT HOP PREFER GLOBAL { BGP_CC->next_hop_prefer = NHP_GLOBAL; }
| MANDATORY bool { BGP_CC->mandatory = $2; }
| MISSING LLADDR bgp_lladdr { log(L_WARN "%s.%s: Missing lladdr option is deprecated and ignored, remove it", this_proto->name, this_channel->name); }
| GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; }
| GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; }
| SECONDARY bool { BGP_CC->secondary = $2; }
+ | VALIDATE bool {
+ BGP_CC->validate = $2;
+ if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW)
+ cf_error("Validate option limited to flowspec channels");
+ }
| GRACEFUL RESTART bool { BGP_CC->gr_able = $3; }
| LONG LIVED GRACEFUL RESTART bool { BGP_CC->llgr_able = $5; }
| LONG LIVED STALE TIME expr { BGP_CC->llgr_time = $5; }
@@ -280,6 +304,16 @@ bgp_channel_item:
else
cf_error("Mismatched IGP table type");
}
+ | BASE TABLE rtable {
+ if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW)
+ cf_error("Base table option limited to flowspec channels");
+
+ if (((BGP_CC->afi == BGP_AF_FLOW4) && ($3->addr_type == NET_IP4)) ||
+ ((BGP_CC->afi == BGP_AF_FLOW6) && ($3->addr_type == NET_IP6)))
+ BGP_CC->base_table = $3;
+ else
+ cf_error("Mismatched base table type");
+ }
;
bgp_channel_opts:
@@ -297,41 +331,14 @@ bgp_channel_end:
if (!this_channel->table)
cf_error("Routing table not specified");
+ if (BGP_CC->import_table)
+ this_channel->in_keep |= RIK_PREFILTER;
+
this_channel = NULL;
};
bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end;
-
-dynamic_attr: BGP_ORIGIN
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); } ;
-dynamic_attr: BGP_PATH
- { $$ = f_new_dynamic_attr(EAF_TYPE_AS_PATH, T_PATH, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); } ;
-dynamic_attr: BGP_NEXT_HOP
- { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)); } ;
-dynamic_attr: BGP_MED
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); } ;
-dynamic_attr: BGP_LOCAL_PREF
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); } ;
-dynamic_attr: BGP_ATOMIC_AGGR
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_ATOMIC_AGGR)); } ;
-dynamic_attr: BGP_AGGREGATOR
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AGGREGATOR)); } ;
-dynamic_attr: BGP_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); } ;
-dynamic_attr: BGP_ORIGINATOR_ID
- { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); } ;
-dynamic_attr: BGP_CLUSTER_LIST
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ;
-dynamic_attr: BGP_EXT_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ;
-dynamic_attr: BGP_AIGP
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ;
-dynamic_attr: BGP_LARGE_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ;
-
-
-
CF_ENUM(T_ENUM_BGP_ORIGIN, ORIGIN_, IGP, EGP, INCOMPLETE)
CF_CODE
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 88b66040..978a1891 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -15,8 +15,8 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
-#include "nest/attrs.h"
+#include "nest/rt.h"
+#include "lib/attrs.h"
#include "proto/mrt/mrt.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
@@ -238,6 +238,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn)
caps->ext_messages = p->cf->enable_extended_messages;
caps->route_refresh = p->cf->enable_refresh;
caps->enhanced_refresh = p->cf->enable_refresh;
+ caps->role = p->cf->local_role;
if (caps->as4_support)
caps->as4_number = p->public_as;
@@ -261,7 +262,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn)
}
/* Allocate and fill per-AF fields */
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
ac = &caps->af_data[caps->af_count++];
ac->afi = c->afi;
@@ -350,6 +351,13 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
+ if (caps->role != BGP_ROLE_UNDEFINED)
+ {
+ *buf++ = 9; /* Capability 9: Announce chosen BGP role */
+ *buf++ = 1; /* Capability data length */
+ *buf++ = caps->role;
+ }
+
if (caps->gr_aware)
{
*buf++ = 64; /* Capability 64: Support for graceful restart */
@@ -449,11 +457,15 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
struct bgp_proto *p = conn->bgp;
struct bgp_caps *caps;
struct bgp_af_caps *ac;
+ uint err_subcode = 0;
int i, cl;
u32 af;
if (!conn->remote_caps)
+ {
caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps));
+ caps->role = BGP_ROLE_UNDEFINED;
+ }
else
{
caps = conn->remote_caps;
@@ -506,13 +518,28 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
}
break;
- case 6: /* Extended message length capability, RFC draft */
+ case 6: /* Extended message length capability, RFC 8654 */
if (cl != 0)
goto err;
caps->ext_messages = 1;
break;
+ case 9: /* BGP role capability, RFC 9234 */
+ if (cl != 1)
+ goto err;
+
+ /* Reserved value */
+ if (pos[2] == BGP_ROLE_UNDEFINED)
+ { err_subcode = 11; goto err; }
+
+ /* Multiple inconsistent values */
+ if ((caps->role != BGP_ROLE_UNDEFINED) && (caps->role != pos[2]))
+ { err_subcode = 11; goto err; }
+
+ caps->role = pos[2];
+ break;
+
case 64: /* Graceful restart capability, RFC 4724 */
if (cl % 4 != 2)
goto err;
@@ -638,7 +665,7 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
err:
mb_free(caps);
- bgp_error(conn, 2, 0, NULL, 0);
+ bgp_error(conn, 2, err_subcode, NULL, 0);
return -1;
}
@@ -654,7 +681,7 @@ bgp_check_capabilities(struct bgp_conn *conn)
/* This is partially overlapping with bgp_conn_enter_established_state(),
but we need to run this just after we receive OPEN message */
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
@@ -684,7 +711,7 @@ bgp_read_options(struct bgp_conn *conn, byte *pos, uint len, uint rest)
struct bgp_proto *p = conn->bgp;
int ext = 0;
- /* Handle extended length (draft-ietf-idr-ext-opt-param-07) */
+ /* Handle extended length, RFC 9072 */
if ((len > 0) && (rest > 0) && (pos[0] == 255))
{
if (rest < 3)
@@ -769,7 +796,7 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
buf[10] = 2; /* Option 2: Capability list */
buf[11] = len; /* Option data length */
}
- else /* draft-ietf-idr-ext-opt-param-07 */
+ else /* Extended length, RFC 9072 */
{
/* Move capabilities 4 B forward */
memmove(buf + 16, pos, len);
@@ -820,9 +847,25 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
if (bgp_read_options(conn, pkt+29, pkt[28], len-29) < 0)
return;
+ /* RFC 4271 4.2 - hold time must be either 0 or at least 3 */
if (hold > 0 && hold < 3)
{ bgp_error(conn, 2, 6, pkt+22, 2); return; }
+ /* Compute effective hold and keepalive times */
+ uint hold_time = MIN(hold, p->cf->hold_time);
+ uint keepalive_time = p->cf->keepalive_time ?
+ (p->cf->keepalive_time * hold_time / p->cf->hold_time) :
+ hold_time / 3;
+
+ /* Keepalive time might be rounded down to zero */
+ if (hold_time && !keepalive_time)
+ keepalive_time = 1;
+
+ /* Check effective values against configured minimums */
+ if ((hold_time < p->cf->min_hold_time) ||
+ (keepalive_time < p->cf->min_keepalive_time))
+ { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+
/* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
if (!id || (p->is_internal && id == p->local_id))
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
@@ -854,6 +897,22 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
conn->received_as = asn;
}
+ /* RFC 9234 4.2 - check role agreement */
+ u8 local_role = p->cf->local_role;
+ u8 neigh_role = caps->role;
+
+ if ((local_role != BGP_ROLE_UNDEFINED) &&
+ (neigh_role != BGP_ROLE_UNDEFINED) &&
+ !((local_role == BGP_ROLE_PEER && neigh_role == BGP_ROLE_PEER) ||
+ (local_role == BGP_ROLE_CUSTOMER && neigh_role == BGP_ROLE_PROVIDER) ||
+ (local_role == BGP_ROLE_PROVIDER && neigh_role == BGP_ROLE_CUSTOMER) ||
+ (local_role == BGP_ROLE_RS_CLIENT && neigh_role == BGP_ROLE_RS_SERVER) ||
+ (local_role == BGP_ROLE_RS_SERVER && neigh_role == BGP_ROLE_RS_CLIENT)))
+ { bgp_error(conn, 2, 11, &neigh_role, -1); return; }
+
+ if ((p->cf->require_roles) && (neigh_role == BGP_ROLE_UNDEFINED))
+ { bgp_error(conn, 2, 11, &neigh_role, -1); return; }
+
/* Check the other connection */
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
switch (other->state)
@@ -904,8 +963,8 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
}
/* Update our local variables */
- conn->hold_time = MIN(hold, p->cf->hold_time);
- conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
+ conn->hold_time = hold_time;
+ conn->keepalive_time = keepalive_time;
conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
p->remote_id = id;
@@ -932,14 +991,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
#define WITHDRAW(msg, args...) \
({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+#define REJECT(msg, args...) \
+ ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; })
+
#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
#define NO_LABEL_STACK "Missing MPLS stack"
+#define MISMATCHED_AF " - mismatched address family (%I for %s)"
static void
-bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
+bgp_apply_next_hop(struct bgp_parse_state *s, ea_list **to, ip_addr gw, ip_addr ll)
{
struct bgp_proto *p = s->proto;
struct bgp_channel *c = s->channel;
@@ -949,67 +1012,88 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
neighbor *nbr = NULL;
/* GW_DIRECT -> single_hop -> p->neigh != NULL */
- if (ipa_nonzero(gw))
+ if (ipa_nonzero2(gw))
nbr = neigh_find(&p->p, gw, NULL, 0);
else if (ipa_nonzero(ll))
nbr = neigh_find(&p->p, ll, p->neigh->iface, 0);
+ else
+ WITHDRAW(BAD_NEXT_HOP " - zero address");
- if (!nbr || (nbr->scope == SCOPE_HOST))
- WITHDRAW(BAD_NEXT_HOP);
+ if (!nbr)
+ WITHDRAW(BAD_NEXT_HOP " - address %I not directly reachable", ipa_nonzero(gw) ? gw : ll);
- a->dest = RTD_UNICAST;
- a->nh.gw = nbr->addr;
- a->nh.iface = nbr->iface;
- a->igp_metric = c->cf->cost;
+ if (nbr->scope == SCOPE_HOST)
+ WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr);
+
+ ea_set_attr_u32(to, &ea_gen_igp_metric, 0, c->cf->cost);
+
+ struct nexthop_adata nhad = {
+ .nh = {
+ .gw = nbr->addr,
+ .iface = nbr->iface,
+ },
+ .ad = {
+ .length = sizeof nhad - sizeof nhad.ad,
+ },
+ };
+ ea_set_attr_data(to, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length);
}
else /* GW_RECURSIVE */
{
- if (ipa_zero(gw))
- WITHDRAW(BAD_NEXT_HOP);
+ if (ipa_zero2(gw))
+ WITHDRAW(BAD_NEXT_HOP " - zero address");
rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
- s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
-
- if (!s->mpls)
- rta_apply_hostentry(a, s->hostentry, NULL, s->pool);
+ ip_addr lla = (c->cf->next_hop_prefer == NHP_LOCAL) ? ll : IPA_NONE;
- /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
+ if (s->mpls)
+ {
+ u32 labels[BGP_MPLS_MAX];
+ ea_set_hostentry(to, c->c.table, tab, gw, lla, BGP_MPLS_MAX, labels);
+ }
+ else
+ ea_set_hostentry(to, c->c.table, tab, gw, lla, 0, NULL);
}
}
static void
-bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
+bgp_apply_mpls_labels(struct bgp_parse_state *s, ea_list **to, u32 lnum, u32 labels[lnum])
{
if (lnum > MPLS_MAX_LABEL_STACK)
{
REPORT("Too many MPLS labels ($u)", lnum);
- a->dest = RTD_UNREACHABLE;
- a->hostentry = NULL;
- a->nh = (struct nexthop) { };
+ ea_set_dest(to, 0, RTD_UNREACHABLE);
return;
}
/* Handle implicit NULL as empty MPLS stack */
if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
- lnum = 0;
+ lnum = s->mpls_labels->length = 0;
if (s->channel->cf->gw_mode == GW_DIRECT)
{
- a->nh.labels = lnum;
- memcpy(a->nh.label, labels, 4*lnum);
+ eattr *e = ea_find(*to, &ea_gen_nexthop);
+ struct {
+ struct nexthop_adata nhad;
+ u32 labels[MPLS_MAX_LABEL_STACK];
+ } nh;
+
+ memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length);
+ nh.nhad.nh.labels = lnum;
+ memcpy(nh.labels, labels, lnum * sizeof(u32));
+ nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32);
}
else /* GW_RECURSIVE */
{
- mpls_label_stack ms;
-
- ms.len = lnum;
- memcpy(ms.stack, labels, 4*lnum);
- rta_apply_hostentry(a, s->hostentry, &ms, s->pool);
+ eattr *e = ea_find(*to, &ea_gen_hostentry);
+ ASSERT_DIE(e);
+ struct hostentry_adata *head = (void *) e->u.ptr;
+ memcpy(&head->labels, labels, lnum * sizeof(u32));
+ head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data;
}
}
-
static int
bgp_match_src(struct bgp_export_state *s, int mode)
{
@@ -1039,13 +1123,17 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
return 1;
/* Keep it when explicitly set in export filter */
- if (a->type & EAF_FRESH)
+ if (a->fresh)
return 1;
/* Check for non-matching AF */
if ((ipa_is_ip4(*nh) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
return 0;
+ /* Do not pass NEXT_HOP between different VRFs */
+ if (p->p.vrf && s->src && s->src->p.vrf && (p->p.vrf != s->src->p.vrf))
+ return 0;
+
/* Keep it when exported to internal peers */
if (p->is_interior && ipa_nonzero(*nh))
return 1;
@@ -1056,31 +1144,45 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
return p->neigh && (p->neigh->iface == ifa);
}
-static inline int
+static inline struct nexthop *
bgp_use_gateway(struct bgp_export_state *s)
{
struct bgp_proto *p = s->proto;
struct bgp_channel *c = s->channel;
- rta *ra = s->route->attrs;
+ ea_list *ra = s->route->attrs;
/* Handle next hop self option - also applies to gateway */
if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
- return 0;
+ return NULL;
+
+ eattr *nhea = ea_find(ra, &ea_gen_nexthop);
+ if (!nhea)
+ return NULL;
/* We need one valid global gateway */
- if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
- return 0;
+ struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr;
+ if (!NEXTHOP_IS_REACHABLE(nhad) ||
+ !NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) ||
+ ipa_is_link_local(nhad->nh.gw))
+ return NULL;
/* Check for non-matching AF */
- if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
+ if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
+ return NULL;
+
+ /* Do not use gateway from different VRF */
+ if (p->p.vrf && nhad->nh.iface && (p->p.vrf != nhad->nh.iface->master))
return 0;
/* Use it when exported to internal peers */
if (p->is_interior)
- return 1;
+ return &nhad->nh;
/* Use it when forwarded to single-hop BGP peer on on the same iface */
- return p->neigh && (p->neigh->iface == ra->nh.iface);
+ if (p->neigh && (p->neigh->iface == nhad->nh.iface))
+ return &nhad->nh;
+
+ return NULL;
}
static void
@@ -1088,60 +1190,64 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
{
if (!a || !bgp_use_next_hop(s, a))
{
- if (bgp_use_gateway(s))
+ struct nexthop *nhloc;
+ if (nhloc = bgp_use_gateway(s))
{
- rta *ra = s->route->attrs;
- ip_addr nh[1] = { ra->nh.gw };
- bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
+ ip_addr nh[1] = { nhloc->gw };
+ bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16);
if (s->mpls)
{
u32 implicit_null = BGP_MPLS_NULL;
- u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
- uint lnum = ra->nh.labels ? ra->nh.labels : 1;
- bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
+ u32 *labels = nhloc->labels ? nhloc->label : &implicit_null;
+ uint lnum = nhloc->labels ? nhloc->labels : 1;
+ bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
}
+ else
+ bgp_unset_attr(to, BA_MPLS_LABEL_STACK);
}
else
{
ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
- bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
+ bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
s->local_next_hop = 1;
/* TODO: Use local MPLS assigned label */
if (s->mpls)
{
u32 implicit_null = BGP_MPLS_NULL;
- bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
+ bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
}
+ else
+ bgp_unset_attr(to, BA_MPLS_LABEL_STACK);
}
}
/* Check if next hop is valid */
a = bgp_find_attr(*to, BA_NEXT_HOP);
if (!a)
- WITHDRAW(NO_NEXT_HOP);
+ REJECT(NO_NEXT_HOP);
ip_addr *nh = (void *) a->u.ptr->data;
ip_addr peer = s->proto->remote_ip;
uint len = a->u.ptr->length;
/* Forbid zero next hop */
- if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
- WITHDRAW(BAD_NEXT_HOP);
+ if (ipa_zero2(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
+ REJECT(BAD_NEXT_HOP " - zero address");
/* Forbid next hop equal to neighbor IP */
if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
- WITHDRAW(BAD_NEXT_HOP);
+ REJECT(BAD_NEXT_HOP " - neighbor address %I", peer);
/* Forbid next hop with non-matching AF */
if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
!s->channel->ext_next_hop)
- WITHDRAW(BAD_NEXT_HOP);
+ REJECT(BAD_NEXT_HOP MISMATCHED_AF, nh[0], s->channel->desc->name);
/* Just check if MPLS stack */
if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
- WITHDRAW(NO_LABEL_STACK);
+ REJECT(NO_LABEL_STACK);
}
static uint
@@ -1175,7 +1281,7 @@ bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size
}
static void
-bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, ea_list **to)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
@@ -1212,12 +1318,12 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
ad->length = 16;
if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
- WITHDRAW(BAD_NEXT_HOP);
+ WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name);
// XXXX validate next hop
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
- bgp_apply_next_hop(s, a, nh[0], nh[1]);
+ bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, to, nh[0], nh[1]);
}
static uint
@@ -1255,7 +1361,7 @@ bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint siz
}
static void
-bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, ea_list **to)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
@@ -1293,12 +1399,12 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
bgp_parse_error(s, 9);
if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
- WITHDRAW(BAD_NEXT_HOP);
+ WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name);
// XXXX validate next hop
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
- bgp_apply_next_hop(s, a, nh[0], nh[1]);
+ bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, to, nh[0], nh[1]);
}
@@ -1310,7 +1416,7 @@ bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte
}
static void
-bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
+bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
{
/*
* Although we expect no next hop and RFC 7606 7.11 states that attribute
@@ -1322,11 +1428,11 @@ bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, ui
}
static void
-bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
+bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a, ea_list **to)
{
/* NEXT_HOP shall not pass */
if (a)
- bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
+ bgp_unset_attr(to, BA_NEXT_HOP);
}
@@ -1335,7 +1441,7 @@ bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
*/
static void
-bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
+bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_list *a0)
{
if (path_id != s->last_id)
{
@@ -1344,28 +1450,27 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
s->last_src = rt_get_source(&s->proto->p, path_id);
s->last_id = path_id;
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ ea_free(s->cached_ea);
+ s->cached_ea = NULL;
}
if (!a0)
{
+ /* Route update was changed to withdraw */
+ if (s->err_withdraw && s->reach_nlri_step)
+ REPORT("Invalid route %N withdrawn", n);
+
/* Route withdraw */
rte_update(&s->channel->c, n, NULL, s->last_src);
return;
}
/* Prepare cached route attributes */
- if (s->cached_rta == NULL)
- {
- /* Workaround for rta_lookup() breaking eattrs */
- ea_list *ea = a0->eattrs;
- s->cached_rta = rta_lookup(a0);
- a0->eattrs = ea;
- }
+ if (s->cached_ea == NULL)
+ s->cached_ea = ea_lookup(a0, 0);
rte e0 = {
- .attrs = s->cached_rta,
+ .attrs = s->cached_ea,
.src = s->last_src,
};
@@ -1392,9 +1497,10 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte
}
static void
-bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
+bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, ea_list **to)
{
- u32 labels[BGP_MPLS_MAX], label;
+ u32 labels[BGP_MPLS_MAX];
+ u32 label;
uint lnum = 0;
do {
@@ -1408,31 +1514,20 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p
/* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but
fixed-size 24-bit Compatibility field, which MUST be ignored */
- if (!a && !s->err_withdraw)
+ if (!s->reach_nlri_step)
return;
}
while (!(label & BGP_MPLS_BOS));
- if (!a)
+ if (!*to)
return;
- /* Attach MPLS attribute unless we already have one */
- if (!s->mpls_labels)
- {
- s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
- }
-
- /* Overwrite data in the attribute */
- s->mpls_labels->length = 4*lnum;
- memcpy(s->mpls_labels->data, labels, 4*lnum);
-
/* Update next hop entry in rta */
- bgp_apply_mpls_labels(s, a, labels, lnum);
+ bgp_apply_mpls_labels(s, to, lnum, labels);
/* Attributes were changed, invalidate cached entry */
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ rta_free(s->cached_ea);
+ s->cached_ea = NULL;
return;
}
@@ -1468,14 +1563,14 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1501,7 +1596,7 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
if (l > IP4_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
@@ -1553,14 +1648,14 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1586,7 +1681,7 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
if (l > IP6_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
@@ -1641,14 +1736,14 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1674,7 +1769,7 @@ bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
/* Decode route distinguisher */
if (l < 64)
@@ -1738,14 +1833,14 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1771,7 +1866,7 @@ bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
/* Decode route distinguisher */
if (l < 64)
@@ -1825,14 +1920,14 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1913,14 +2008,14 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -2147,6 +2242,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
* var IPv4 Network Layer Reachability Information
*/
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
+
int lr, la;
la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
@@ -2168,6 +2265,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
static byte *
bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
+
/*
* 2 B IPv4 Withdrawn Routes Length (zero)
* --- IPv4 Withdrawn Routes NLRI (unused)
@@ -2291,11 +2390,14 @@ bgp_create_update(struct bgp_channel *c, byte *buf)
again: ;
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
+
/* Initialize write state */
struct bgp_write_state s = {
.proto = p,
.channel = c,
- .pool = c->c.rte_update_pool,
+ .pool = tmp_linpool,
.mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop,
.as4_session = p->as4_session,
.add_path = c->add_path_tx,
@@ -2303,7 +2405,7 @@ again: ;
};
/* Try unreachable bucket */
- if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
bgp_create_ip_unreach(&s, buck, buf, end):
@@ -2313,14 +2415,14 @@ again: ;
}
/* Try reachable buckets */
- if (!EMPTY_LIST(c->bucket_queue))
+ if (!EMPTY_LIST(c->ptx->bucket_queue))
{
- buck = HEAD(c->bucket_queue);
+ buck = HEAD(c->ptx->bucket_queue);
/* Cleanup empty buckets */
- if (EMPTY_LIST(buck->prefixes))
+ if (bgp_done_bucket(c, buck))
{
- bgp_free_bucket(c, buck);
+ lp_restore(tmp_linpool, &tmpp);
goto again;
}
@@ -2328,13 +2430,13 @@ again: ;
bgp_create_ip_reach(&s, buck, buf, end):
bgp_create_mp_reach(&s, buck, buf, end);
- if (EMPTY_LIST(buck->prefixes))
- bgp_free_bucket(c, buck);
- else
- bgp_defer_bucket(c, buck);
+ bgp_done_bucket(c, buck);
if (!res)
+ {
+ lp_restore(tmp_linpool, &tmpp);
goto again;
+ }
goto done;
}
@@ -2345,7 +2447,7 @@ again: ;
done:
BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
p->stats.tx_updates++;
- lp_flush(s.pool);
+ lp_restore(tmp_linpool, &tmpp);
return res;
}
@@ -2412,7 +2514,6 @@ static inline void
bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
{
struct bgp_channel *c = bgp_get_channel(s->proto, afi);
- rta *a = NULL;
if (!c)
DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
@@ -2434,26 +2535,22 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
if (ea)
{
- a = allocz(RTA_MAX_SIZE);
-
- a->source = RTS_BGP;
- a->scope = SCOPE_UNIVERSE;
- a->from = s->proto->remote_ip;
- a->eattrs = ea;
- a->pref = c->c.preference;
+ ea_set_attr_data(&ea, &ea_gen_from, 0, &s->proto->remote_ip, sizeof(ip_addr));
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->c.preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BGP);
- c->desc->decode_next_hop(s, nh, nh_len, a);
- bgp_finish_attrs(s, a);
+ c->desc->decode_next_hop(s, nh, nh_len, &ea);
+ bgp_finish_attrs(s, &ea);
/* Handle withdraw during next hop decoding */
if (s->err_withdraw)
- a = NULL;
+ ea = NULL;
}
- c->desc->decode_nlri(s, nlri, len, a);
+ c->desc->decode_nlri(s, nlri, len, ea);
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ rta_free(s->cached_ea);
+ s->cached_ea = NULL;
rt_unlock_source(s->last_src);
}
@@ -2477,10 +2574,13 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
bgp_start_timer(p, conn->hold_timer, conn->hold_time);
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
+
/* Initialize parse state */
struct bgp_parse_state s = {
.proto = p,
- .pool = p->rx_lp,
+ .pool = tmp_linpool,
.as4_session = p->as4_session,
};
@@ -2546,6 +2646,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
if (s.mp_unreach_len)
bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
+ s.reach_nlri_step = 1;
+
if (s.ip_reach_len)
bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
ea, s.ip_next_hop_data, s.ip_next_hop_len);
@@ -2555,8 +2657,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
ea, s.mp_next_hop_data, s.mp_next_hop_len);
done:
- rta_free(s.cached_rta);
- lp_flush(s.pool);
+ rta_free(s.cached_ea);
+ lp_restore(tmp_linpool, &tmpp);
return;
}
@@ -2879,7 +2981,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
{
ASSERT(conn->sk);
- DBG("BGP: Scheduling packet type %d\n", type);
+ struct bgp_proto *p = conn->bgp;
+ if (c)
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name);
+ else
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type);
if (c)
{
@@ -2896,7 +3002,7 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
conn->packets_to_send |= 1 << type;
if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
- ev_send_loop(conn->bgp->p.loop, conn->tx_ev);
+ proto_send_event(&p->p, conn->tx_ev);
}
void
bgp_kick_tx(void *vconn)
@@ -2909,7 +3015,7 @@ bgp_kick_tx(void *vconn)
;
if (!max && !ev_active(conn->tx_ev))
- ev_send_loop(conn->bgp->p.loop, conn->tx_ev);
+ proto_send_event(&conn->bgp->p, conn->tx_ev);
}
void
@@ -2917,13 +3023,14 @@ bgp_tx(sock *sk)
{
struct bgp_conn *conn = sk->data;
+ ASSERT_DIE(birdloop_inside(conn->bgp->p.loop));
DBG("BGP: TX hook\n");
uint max = 1024;
while (--max && (bgp_fire_tx(conn) > 0))
;
if (!max && !ev_active(conn->tx_ev))
- ev_send_loop(conn->bgp->p.loop, conn->tx_ev);
+ proto_send_event(&conn->bgp->p, conn->tx_ev);
}
@@ -2944,6 +3051,7 @@ static struct {
{ 2, 6, "Unacceptable hold time" },
{ 2, 7, "Required capability missing" }, /* [RFC5492] */
{ 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
+ { 2,11, "Role mismatch" }, /* From Open Policy, RFC 9234 */
{ 3, 0, "Invalid UPDATE message" },
{ 3, 1, "Malformed attribute list" },
{ 3, 2, "Unrecognized well-known attribute" },
@@ -3038,13 +3146,19 @@ bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode,
if (len)
{
- /* Bad peer AS - we would like to print the AS */
- if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
+ /* Bad peer AS / unacceptable hold time - print the value as decimal number */
+ if ((code == 2) && ((subcode == 2) || (subcode == 6)) && ((len == 2) || (len == 4)))
{
t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
goto done;
}
+ if ((code == 2) && (subcode == 11) && (len == 1))
+ {
+ t += bsprintf(t, " (%s)", bgp_format_role_name(get_u8(data)));
+ goto done;
+ }
+
/* RFC 8203 - shutdown communication */
if (((code == 6) && ((subcode == 2) || (subcode == 4))))
if (bgp_handle_message(p, data, len, &t))
@@ -3147,6 +3261,30 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
}
}
+void
+bgp_uncork(void *vp)
+{
+ /* The uncork event is run from &main_birdloop and there is no useful way how
+ * to assign the target loop to it, thus we have to lock it ourselves. */
+
+ struct bgp_proto *p = vp;
+ if (!p)
+ return;
+
+ birdloop_enter(p->p.loop);
+
+ if (p && p->conn && (p->conn->state == BS_ESTABLISHED) && !p->conn->sk->rx_hook)
+ {
+ struct birdsock *sk = p->conn->sk;
+ ASSERT_DIE(sk->rpos > sk->rbuf);
+ sk_resume_rx(p->p.loop, sk, bgp_rx);
+ bgp_rx(sk, sk->rpos - sk->rbuf);
+ BGP_TRACE(D_PACKETS, "Uncorked");
+ }
+
+ birdloop_leave(p->p.loop);
+}
+
/**
* bgp_rx - handle received data
* @sk: socket
@@ -3161,6 +3299,7 @@ int
bgp_rx(sock *sk, uint size)
{
struct bgp_conn *conn = sk->data;
+ struct bgp_proto *p = conn->bgp;
byte *pkt_start = sk->rbuf;
byte *end = pkt_start + size;
uint i, len;
@@ -3170,6 +3309,12 @@ bgp_rx(sock *sk, uint size)
{
if ((conn->state == BS_CLOSE) || (conn->sk != sk))
return 0;
+ if ((conn->state == BS_ESTABLISHED) && rt_cork_check(conn->bgp->uncork_ev))
+ {
+ sk_pause_rx(p->p.loop, sk);
+ BGP_TRACE(D_PACKETS, "Corked");
+ return 0;
+ }
for(i=0; i<16; i++)
if (pkt_start[i] != 0xff)
{