summaryrefslogtreecommitdiff
path: root/proto/bgp
diff options
context:
space:
mode:
Diffstat (limited to 'proto/bgp')
-rw-r--r--proto/bgp/attrs.c155
-rw-r--r--proto/bgp/bgp.c154
-rw-r--r--proto/bgp/bgp.h43
-rw-r--r--proto/bgp/config.Y6
-rw-r--r--proto/bgp/packets.c299
5 files changed, 542 insertions, 115 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 1b124a17..f2a8e8b5 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -41,24 +41,6 @@
* specifies that such updates should be ignored, but that is generally
* a bad idea.
*
- * Error checking of optional transitive attributes is done according to
- * draft-ietf-idr-optional-transitive-03, but errors are handled always
- * as withdraws.
- *
- * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
- * but unknown segments cause a session drop with Malformed AS_PATH
- * error (see validate_path()). The behavior in such case is not
- * explicitly specified by RFC 4271. RFC 5065 specifies that
- * inconsistent AS_CONFED_* segments should cause a session drop, but
- * implementations that pass invalid AS_CONFED_* segments are
- * widespread.
- *
- * Error handling of AS4_* attributes is done as specified by
- * draft-ietf-idr-rfc4893bis-03. There are several possible
- * inconsistencies between AGGREGATOR and AS4_AGGREGATOR that are not
- * handled by that draft, these are logged and ignored (see
- * bgp_reconstruct_4b_attrs()).
- *
* BGP attribute table has several hooks:
*
* export - Hook that validates and normalizes attribute during export phase.
@@ -281,11 +263,19 @@ bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
static void
bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
+ struct bgp_proto *p = s->proto;
+ int as_length = s->as4_session ? 4 : 2;
+ int as_confed = p->cf->confederation && p->is_interior;
char err[128];
- if (!as_path_valid(data, len, (s->as4_session ? 4 : 2), err, sizeof(err)))
+ if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
WITHDRAW("Malformed AS_PATH attribute - %s", err);
+ /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
+ if (p->is_interior && !p->is_internal &&
+ ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
+ WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
+
if (!s->as4_session)
{
/* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
@@ -603,11 +593,20 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt
if (len < 6)
DISCARD(BAD_LENGTH, "AS4_PATH", len);
- if (!as_path_valid(data, len, 4, err, sizeof(err)))
+ if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
DISCARD("Malformed AS4_PATH attribute - %s", err);
- /* XXXX remove CONFED segments */
- bgp_set_attr_data(to, s->pool, BA_AS4_PATH, flags, data, len);
+ struct adata *a = lp_alloc_adata(s->pool, len);
+ memcpy(a->data, data, len);
+
+ /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
+ if (as_path_contains_confed(a))
+ {
+ REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
+ a = as_path_strip_confed(s->pool, a);
+ }
+
+ bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
}
static void
@@ -1042,7 +1041,7 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
if (bgp_as_path_loopy(p, attrs, p->local_as))
goto withdraw;
- /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4 */
+ /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
goto withdraw;
@@ -1221,8 +1220,17 @@ bgp_init_prefix_table(struct bgp_channel *c)
{
HASH_INIT(c->prefix_hash, c->pool, 8);
- c->prefix_slab = sl_new(c->pool, sizeof(struct bgp_prefix) +
- net_addr_length[c->c.net_type]);
+ uint alen = net_addr_length[c->c.net_type];
+ c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
+}
+
+void
+bgp_free_prefix_table(struct bgp_channel *c)
+{
+ HASH_FREE(c->prefix_hash);
+
+ rfree(c->prefix_slab);
+ c->prefix_slab = NULL;
}
static struct bgp_prefix *
@@ -1237,7 +1245,11 @@ bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
return px;
}
- px = sl_alloc(c->prefix_slab);
+ if (c->prefix_slab)
+ px = sl_alloc(c->prefix_slab);
+ else
+ px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
+
px->buck_node.next = NULL;
px->buck_node.prev = NULL;
px->hash = hash;
@@ -1254,7 +1266,11 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
{
rem_node(&px->buck_node);
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
- sl_free(c->prefix_slab, px);
+
+ if (c->prefix_slab)
+ sl_free(c->prefix_slab, px);
+ else
+ mb_free(px);
}
@@ -1278,6 +1294,8 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li
if (src == NULL)
return 0;
+ // XXXX: Check next hop AF
+
/* IBGP route reflection, RFC 4456 */
if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
{
@@ -1314,82 +1332,86 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li
return 0;
}
-static const adata null_adata; /* adata of length 0 */
-static inline void
-bgp_path_prepend(ea_list **attrs, struct linpool *pool, int seg, u32 as, int strip)
-{
- eattr *a = bgp_find_attr(*attrs, BA_AS_PATH);
- adata *d = as_path_prepend2(pool, a ? a->u.ptr : &null_adata, seg, as, strip);
- bgp_set_attr_ptr(attrs, pool, BA_AS_PATH, 0, d);
-}
-
-static inline void
-bgp_cluster_list_prepend(ea_list **attrs, struct linpool *pool, u32 id)
-{
- eattr *a = bgp_find_attr(*attrs, BA_CLUSTER_LIST);
- adata *d = int_set_add(pool, a ? a->u.ptr : NULL, id);
- bgp_set_attr_ptr(attrs, pool, BA_CLUSTER_LIST, 0, d);
-}
+static adata null_adata; /* adata of length 0 */
static ea_list *
-bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs, struct linpool *pool)
+bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
{
struct proto *SRC = e->attrs->src->proto;
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
struct bgp_export_state s = { .proto = p, .channel =c, .pool = pool, .src = src, .route = e };
+ ea_list *attrs = attrs0;
eattr *a;
+ adata *ad;
/* ORIGIN attribute - mandatory, attach if missing */
- if (! bgp_find_attr(attrs, BA_ORIGIN))
+ if (! bgp_find_attr(attrs0, BA_ORIGIN))
bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
+ /* AS_PATH attribute - mandatory */
+ a = bgp_find_attr(attrs0, BA_AS_PATH);
+ ad = a ? a->u.ptr : &null_adata;
+
+ /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
+ if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
+ ad = as_path_strip_confed(pool, ad);
+
/* AS_PATH attribute - keep or prepend ASN */
if (p->is_internal ||
(p->rs_client && src && src->rs_client))
{
/* IBGP or route server -> just ensure there is one */
- if (! bgp_find_attr(attrs, BA_AS_PATH))
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, lp_alloc_adata(pool, 0));
+ if (!a)
+ bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
}
else if (p->is_interior)
{
- /* Confederation -> prepend ASN as CONFED_SEQUENCE, keep CONFED_* segments */
- bgp_path_prepend(&attrs, pool, AS_PATH_CONFED_SEQUENCE, p->public_as, 0);
+ /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
+ ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
+ bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
}
else /* Regular EBGP (no RS, no confederation) */
{
- /* Regular EBGP -> prepend ASN as regular segment, strip CONFED_* segments */
- bgp_path_prepend(&attrs, pool, AS_PATH_SEQUENCE, p->public_as, 1);
+ /* Regular EBGP -> prepend ASN as regular sequence */
+ ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
+ bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
/* MULTI_EXIT_DESC attribute - accept only if set in export filter */
- a = bgp_find_attr(attrs, BA_MULTI_EXIT_DISC);
+ a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
if (a && !(a->type & EAF_FRESH))
bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
}
/* NEXT_HOP attribute - delegated to AF-specific hook */
- a = bgp_find_attr(attrs, BA_NEXT_HOP);
+ a = bgp_find_attr(attrs0, BA_NEXT_HOP);
bgp_update_next_hop(&s, a, &attrs);
/* LOCAL_PREF attribute - required for IBGP, attach if missing */
- if (p->is_interior && ! bgp_find_attr(attrs, BA_LOCAL_PREF))
+ if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
/* IBGP route reflection, RFC 4456 */
if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
{
/* ORIGINATOR_ID attribute - attach if not already set */
- if (! bgp_find_attr(attrs, BA_ORIGINATOR_ID))
+ if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
/* CLUSTER_LIST attribute - prepend cluster ID */
+ a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
+ ad = a ? a->u.ptr : NULL;
+
+ /* Prepend src cluster ID */
if (src->rr_cluster_id)
- bgp_cluster_list_prepend(&attrs, pool, src->rr_cluster_id);
+ ad = int_set_prepend(pool, ad, src->rr_cluster_id);
- /* Handle different src and dst cluster ID - prepend both ones */
+ /* Prepend dst cluster ID if src and dst clusters are different */
if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
- bgp_cluster_list_prepend(&attrs, pool, p->rr_cluster_id);
+ ad = int_set_prepend(pool, ad, p->rr_cluster_id);
+
+ /* Should be at least one prepended cluster ID */
+ bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
}
/* AS4_* transition attributes, RFC 6793 4.2.2 */
@@ -1410,6 +1432,12 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
}
}
+ /*
+ * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
+ * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
+ * should be checked in AF-specific hooks.
+ */
+
/* Apply per-attribute export hooks for validatation and normalization */
return bgp_export_attrs(&s, attrs);
}
@@ -1452,10 +1480,12 @@ bgp_get_neighbor(rte *r)
eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
u32 as;
- if (e && as_path_get_first(e->u.ptr, &as))
+ if (e && as_path_get_first_regular(e->u.ptr, &as))
return as;
- else
- return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
+
+ /* If AS_PATH is not defined, we treat rte as locally originated */
+ struct bgp_proto *p = (void *) r->attrs->src->proto;
+ return p->cf->confederation ?: p->local_as;
}
static inline int
@@ -1653,7 +1683,7 @@ bgp_rte_mergable(rte *pri, rte *sec)
}
/* RFC 4271 9.1.2.2. d) Prefer external peers */
- if (pri_bgp->is_internal != sec_bgp->is_internal)
+ if (pri_bgp->is_interior != sec_bgp->is_interior)
return 0;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
@@ -1843,6 +1873,7 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
/* Handle AS_PATH attribute */
if (p2 && p4)
{
+ /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
int p2_len = as_path_getlen(p2->u.ptr);
int p4_len = as_path_getlen(p4->u.ptr);
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 2ca153ab..5e95e6b4 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -521,12 +521,17 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
if (peer->gr_aware)
c->load_state = BFS_LOADING;
+ c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
- // XXXX reset back to non-ANY?
+ /* Update RA mode */
if (c->add_path_tx)
c->c.ra_mode = RA_ANY;
+ else if (c->cf->secondary)
+ c->c.ra_mode = RA_ACCEPTED;
+ else
+ c->c.ra_mode = RA_OPTIMAL;
}
p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
@@ -554,6 +559,10 @@ bgp_conn_leave_established_state(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "BGP session closed");
p->conn = NULL;
+ // XXXX free these tables to avoid memory leak during graceful restart
+ // bgp_free_prefix_table(p);
+ // bgp_free_bucket_table(p);
+
if (p->p.proto_state == PS_UP)
bgp_stop(p, 0);
}
@@ -1411,8 +1420,6 @@ bgp_channel_init(struct channel *C, struct channel_config *CF)
struct bgp_channel *c = (void *) C;
struct bgp_channel_config *cf = (void *) CF;
- C->ra_mode = cf->secondary ? RA_ACCEPTED : RA_OPTIMAL;
-
c->cf = cf;
c->afi = cf->afi;
c->desc = bgp_get_af_desc(c->afi);
@@ -1757,10 +1764,131 @@ bgp_get_status(struct proto *P, byte *buf)
}
static void
+bgp_show_afis(int code, char *s, u32 *afis, uint count)
+{
+ buffer b;
+ LOG_BUFFER_INIT(b);
+
+ buffer_puts(&b, s);
+
+ for (u32 *af = afis; af < (afis + count); af++)
+ {
+ const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
+ if (desc)
+ buffer_print(&b, " %s", desc->name);
+ else
+ buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
+ }
+
+ if (b.pos == b.end)
+ strcpy(b.end - 32, " ... <too long>");
+
+ cli_msg(code, b.start);
+}
+
+static void
+bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
+{
+ struct bgp_af_caps *ac;
+ uint any_mp_bgp = 0;
+ uint any_gr_able = 0;
+ uint any_add_path = 0;
+ uint any_ext_next_hop = 0;
+ u32 *afl1 = alloca(caps->af_count * sizeof(u32));
+ u32 *afl2 = alloca(caps->af_count * sizeof(u32));
+ uint afn1, afn2;
+
+ WALK_AF_CAPS(caps, ac)
+ {
+ any_mp_bgp |= ac->ready;
+ any_gr_able |= ac->gr_able;
+ any_add_path |= ac->add_path;
+ any_ext_next_hop |= ac->ext_next_hop;
+ }
+
+ if (any_mp_bgp)
+ {
+ cli_msg(-1006, " Multiprotocol");
+
+ afn1 = 0;
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ready)
+ afl1[afn1++] = ac->afi;
+
+ bgp_show_afis(-1006, " AF announced:", afl1, afn1);
+ }
+
+ if (caps->route_refresh)
+ cli_msg(-1006, " Route refresh");
+
+ if (any_ext_next_hop)
+ {
+ cli_msg(-1006, " Extended next hop");
+
+ afn1 = 0;
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ext_next_hop)
+ afl1[afn1++] = ac->afi;
+
+ bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
+ }
+
+ if (caps->ext_messages)
+ cli_msg(-1006, " Extended message");
+
+ if (caps->gr_aware)
+ cli_msg(-1006, " Graceful restart");
+
+ if (any_gr_able)
+ {
+ /* Continues from gr_aware */
+ cli_msg(-1006, " Restart time: %u", caps->gr_time);
+ if (caps->gr_flags & BGP_GRF_RESTART)
+ cli_msg(-1006, " Restart recovery");
+
+ afn1 = afn2 = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ if (ac->gr_able)
+ afl1[afn1++] = ac->afi;
+
+ if (ac->gr_af_flags & BGP_GRF_FORWARDING)
+ afl2[afn2++] = ac->afi;
+ }
+
+ bgp_show_afis(-1006, " AF supported:", afl1, afn1);
+ bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
+ }
+
+ if (caps->as4_support)
+ cli_msg(-1006, " 4-octet AS numbers");
+
+ if (any_add_path)
+ {
+ cli_msg(-1006, " ADD-PATH");
+
+ afn1 = afn2 = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ if (ac->add_path & BGP_ADD_PATH_RX)
+ afl1[afn1++] = ac->afi;
+
+ if (ac->add_path & BGP_ADD_PATH_TX)
+ afl2[afn2++] = ac->afi;
+ }
+
+ bgp_show_afis(-1006, " RX:", afl1, afn1);
+ bgp_show_afis(-1006, " TX:", afl2, afn2);
+ }
+
+ if (caps->enhanced_refresh)
+ cli_msg(-1006, " Enhanced refresh");
+}
+
+static void
bgp_show_proto_info(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- struct bgp_conn *c = p->conn;
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
@@ -1789,15 +1917,11 @@ bgp_show_proto_info(struct proto *P)
else if (P->proto_state == PS_UP)
{
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
+ cli_msg(-1006, " Local capabilities");
+ bgp_show_capabilities(p, p->conn->local_caps);
+ cli_msg(-1006, " Neighbor capabilities");
+ bgp_show_capabilities(p, p->conn->remote_caps);
/* XXXX
- cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s",
- c->peer_refresh_support ? " refresh" : "",
- c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
- c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
- c->peer_as4_support ? " AS4" : "",
- (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
- (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
- c->peer_ext_messages_support ? " ext-messages" : "");
cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
p->is_internal ? "internal" : "external",
p->cf->multihop ? " multihop" : "",
@@ -1810,9 +1934,9 @@ bgp_show_proto_info(struct proto *P)
*/
cli_msg(-1006, " Source address: %I", p->source_addr);
cli_msg(-1006, " Hold timer: %d/%d",
- tm_remains(c->hold_timer), c->hold_time);
+ tm_remains(p->conn->hold_timer), p->conn->hold_time);
cli_msg(-1006, " Keepalive timer: %d/%d",
- tm_remains(c->keepalive_timer), c->keepalive_time);
+ tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
}
if ((p->last_error_class != BE_NONE) &&
@@ -1846,7 +1970,7 @@ struct protocol proto_bgp = {
.template = "bgp%d",
.attr_class = EAP_BGP,
.preference = DEF_PREF_BGP,
- .channel_mask = NB_IP,
+ .channel_mask = NB_IP | NB_FLOW4 | NB_FLOW6,
.proto_size = sizeof(struct bgp_proto),
.config_size = sizeof(struct bgp_config),
.postconfig = bgp_postconfig,
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 4ecb86a0..5d2539d5 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -31,6 +31,7 @@ struct eattr;
#define BGP_SAFI_UNICAST 1
#define BGP_SAFI_MULTICAST 2
+#define BGP_SAFI_FLOW 133
/* Internal AF codes */
@@ -42,6 +43,8 @@ struct eattr;
#define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST )
#define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST )
#define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST )
+#define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW )
+#define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW )
struct bgp_write_state;
@@ -70,7 +73,7 @@ struct bgp_config {
u16 local_port; /* Local listening port */
u16 remote_port; /* Neighbor destination port */
int multihop; /* Number of hops if multihop */
- int strict_bind; /* Bind listening socket to local address XXXX */
+ int strict_bind; /* Bind listening socket to local address */
int ttl_security; /* Enable TTL security [RFC 5082] */
int compare_path_lengths; /* Use path lengths when selecting best route */
int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */
@@ -120,6 +123,7 @@ struct bgp_channel_config {
u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
u8 gr_able; /* Allow full graceful restart for the channel */
+ u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */
u8 add_path; /* Use ADD-PATH extension [RFC 7911] */
struct rtable_config *igp_table; /* Table used for recursive next hop lookups */
@@ -151,6 +155,7 @@ struct bgp_af_caps {
u8 ready; /* Multiprotocol capability, RFC 4760 */
u8 gr_able; /* Graceful restart support, RFC 4724 */
u8 gr_af_flags; /* Graceful restart per-AF flags */
+ u8 ext_next_hop; /* Extended IPv6 next hop, RFC 5549 */
u8 add_path; /* Multiple paths support, RFC 7911 */
};
@@ -171,6 +176,10 @@ struct bgp_caps {
struct bgp_af_caps af_data[0]; /* Per-AF capability data */
};
+#define WALK_AF_CAPS(caps,ac) \
+ for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++)
+
+
struct bgp_socket {
node n; /* Node in global bgp_sockets */
sock *sk; /* Real listening socket */
@@ -267,6 +276,8 @@ struct bgp_channel {
u8 gr_ready; /* Neighbor could do GR on this AF */
u8 gr_active; /* Neighbor is doing GR and keeping fwd state */
+ u8 ext_next_hop; /* Session allows both IPv4 and IPv6 next hops */
+
u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */
u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */
@@ -446,18 +457,6 @@ bgp_unset_attr(ea_list **to, struct linpool *pool, uint code)
{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; }
-
-
-/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
- * we store two addesses in it - a global address and a link local address.
- */
-#ifdef XXX
-#define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
-static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
-#define NEXT_HOP_LENGTH sizeof(ip_addr)
-static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
-#endif
-
int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
@@ -510,26 +509,22 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BAF_PARTIAL 0x20
#define BAF_EXT_LEN 0x10
-#define BA_ORIGIN 0x01 /* [RFC1771] */ /* WM */
+#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */
#define BA_AS_PATH 0x02 /* WM */
#define BA_NEXT_HOP 0x03 /* WM */
#define BA_MULTI_EXIT_DISC 0x04 /* ON */
#define BA_LOCAL_PREF 0x05 /* WD */
#define BA_ATOMIC_AGGR 0x06 /* WD */
#define BA_AGGREGATOR 0x07 /* OT */
-#define BA_COMMUNITY 0x08 /* [RFC1997] */ /* OT */
-#define BA_ORIGINATOR_ID 0x09 /* [RFC1966] */ /* ON */
-#define BA_CLUSTER_LIST 0x0a /* ON */
-/* We don't support these: */
-#define BA_DPA 0x0b /* ??? */
-#define BA_ADVERTISER 0x0c /* [RFC1863] */
-#define BA_RCID_PATH 0x0d
-#define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */
-#define BA_MP_UNREACH_NLRI 0x0f
+#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */
+#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */
+#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */
+#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */
+#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */
#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */
#define BA_AS4_PATH 0x11 /* RFC 6793 */
#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
-#define BA_LARGE_COMMUNITY 0x20 /* [draft-ietf-idr-large-community] */
+#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
/* BGP connection states */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 10a338d8..8c63b331 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -28,7 +28,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
- STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST)
+ STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6)
%type <i32> bgp_afi
@@ -139,6 +139,8 @@ bgp_afi:
| IPV6 { $$ = BGP_AF_IPV6; }
| IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; }
| IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; }
+ | FLOW4 { $$ = BGP_AF_FLOW4; }
+ | FLOW6 { $$ = BGP_AF_FLOW6; }
;
bgp_channel_start: bgp_afi
@@ -150,6 +152,7 @@ bgp_channel_start: bgp_afi
this_channel = channel_config_new(&channel_bgp, desc->net, this_proto);
BGP_CC->c.name = desc->name;
+ BGP_CC->c.ra_mode = RA_UNDEF;
BGP_CC->afi = $1;
BGP_CC->gr_able = 0xff; /* undefined */
};
@@ -166,6 +169,7 @@ bgp_channel_item:
| GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; }
| SECONDARY bool { BGP_CC->secondary = $2; }
| GRACEFUL RESTART bool { BGP_CC->gr_able = $3; }
+ | EXTENDED NEXT HOP bool { BGP_CC->ext_next_hop = $4; }
| ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; }
| ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; }
| ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; }
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 9380f999..2106e0d1 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -20,6 +20,7 @@
#include "nest/mrtdump.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
+#include "lib/flowspec.h"
#include "lib/socket.h"
#include "nest/cli.h"
@@ -184,9 +185,6 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
/* Capability negotiation as per RFC 5492 */
-#define WALK_AF_CAPS(caps,ac) \
- for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++)
-
const struct bgp_af_caps *
bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
{
@@ -230,6 +228,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
struct bgp_channel *c;
struct bgp_caps *caps;
struct bgp_af_caps *ac;
+ uint any_ext_next_hop = 0;
uint any_add_path = 0;
byte *data;
@@ -261,6 +260,9 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
ac->afi = c->afi;
ac->ready = 1;
+ ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
+ any_ext_next_hop |= ac->ext_next_hop;
+
ac->add_path = c->cf->add_path;
any_add_path |= ac->add_path;
@@ -279,6 +281,12 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
/* Create capability list in buffer */
+ /*
+ * Note that max length is ~ 20+14*af_count. With max 6 channels that is
+ * 104. Option limit is 253 and buffer size is 4096, so we cannot overflow
+ * unless we add new capabilities or more AFs.
+ */
+
WALK_AF_CAPS(caps, ac)
if (ac->ready)
{
@@ -294,6 +302,23 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
+ if (any_ext_next_hop)
+ {
+ *buf++ = 5; /* Capability 5: Support for extended next hop */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ext_next_hop)
+ {
+ put_af4(buf, ac->afi);
+ put_u16(buf+4, BGP_AFI_IPV6);
+ buf += 6;
+ }
+
+ data[-1] = buf - data;
+ }
+
if (caps->ext_messages)
{
*buf++ = 6; /* Capability 6: Support for extended messages */
@@ -352,8 +377,6 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
- /* FIXME: Should not XXXX 255 */
-
return buf;
}
@@ -392,6 +415,23 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i
caps->route_refresh = 1;
break;
+ case 5: /* Extended next hop encoding capability, RFC 5549 */
+ if (cl % 6)
+ goto err;
+
+ for (i = 0; i < cl; i += 6)
+ {
+ /* Specified only for IPv4 prefixes with IPv6 next hops */
+ if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
+ (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
+ continue;
+
+ af = get_af4(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->ext_next_hop = 1;
+ }
+ break;
+
case 6: /* Extended message length capability, RFC draft */
if (cl != 0)
goto err;
@@ -673,9 +713,13 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
#define REPORT(msg, args...) \
({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
+#define DISCARD(msg, args...) \
+ ({ REPORT(msg, ## args); return; })
+
#define WITHDRAW(msg, args...) \
({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
@@ -792,6 +836,32 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
WITHDRAW(BAD_NEXT_HOP);
}
+static uint
+bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
+{
+ return 0;
+}
+
+static void
+bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
+{
+ /*
+ * Although we expect no next hop and RFC 7606 7.11 states that attribute
+ * MP_REACH_NLRI with unexpected next hop length is considered malformed,
+ * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
+ */
+
+ return;
+}
+
+static void
+bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
+{
+ /* NEXT_HOP shall not pass */
+ if (a)
+ bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
+}
+
/*
* UPDATE
@@ -1065,6 +1135,190 @@ bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
}
+static uint
+bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_flow4 *net = (void *) px->net;
+ uint flen = net->length - sizeof(net_addr_flow4);
+
+ /* Encode path ID */
+ if (s->add_path)
+ {
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
+ }
+
+ if (flen > size)
+ break;
+
+ /* Copy whole flow data including length */
+ memcpy(pos, net->data, flen);
+ ADVANCE(pos, size, flen);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 4)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
+
+ if (len < 2)
+ bgp_parse_error(s, 1);
+
+ /* Decode flow length */
+ uint hlen = flow_hdr_length(pos);
+ uint dlen = flow_read_length(pos);
+ uint flen = hlen + dlen;
+ byte *data = pos + hlen;
+
+ if (len < flen)
+ bgp_parse_error(s, 1);
+
+ /* Validate flow data */
+ enum flow_validated_state r = flow4_validate(data, dlen);
+ if (r != FLOW_ST_VALID)
+ {
+ log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+ bgp_parse_error(s, 1);
+ }
+
+ if (data[0] != FLOW_TYPE_DST_PREFIX)
+ {
+ log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+ bgp_parse_error(s, 1);
+ }
+
+ /* Decode dst prefix */
+ ip4_addr px = IP4_NONE;
+ uint pxlen = data[1];
+
+ // FIXME: Use some generic function
+ memcpy(&px, data, BYTES(pxlen));
+ px = ip4_and(px, ip4_mkmask(pxlen));
+
+ /* Prepare the flow */
+ net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
+ net_fill_flow4(n, px, pxlen, pos, flen);
+ ADVANCE(pos, len, flen);
+
+ bgp_rte_update(s, n, path_id, a);
+ }
+}
+
+
+static uint
+bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_flow6 *net = (void *) px->net;
+ uint flen = net->length - sizeof(net_addr_flow6);
+
+ /* Encode path ID */
+ if (s->add_path)
+ {
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
+ }
+
+ if (flen > size)
+ break;
+
+ /* Copy whole flow data including length */
+ memcpy(pos, net->data, flen);
+ ADVANCE(pos, size, flen);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 4)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
+
+ if (len < 2)
+ bgp_parse_error(s, 1);
+
+ /* Decode flow length */
+ uint hlen = flow_hdr_length(pos);
+ uint dlen = flow_read_length(pos);
+ uint flen = hlen + dlen;
+ byte *data = pos + hlen;
+
+ if (len < flen)
+ bgp_parse_error(s, 1);
+
+ /* Validate flow data */
+ enum flow_validated_state r = flow6_validate(data, dlen);
+ if (r != FLOW_ST_VALID)
+ {
+ log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+ bgp_parse_error(s, 1);
+ }
+
+ if (data[0] != FLOW_TYPE_DST_PREFIX)
+ {
+ log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+ bgp_parse_error(s, 1);
+ }
+
+ /* Decode dst prefix */
+ ip6_addr px = IP6_NONE;
+ uint pxlen = data[1];
+
+ // FIXME: Use some generic function
+ memcpy(&px, data, BYTES(pxlen));
+ px = ip6_and(px, ip6_mkmask(pxlen));
+
+ /* Prepare the flow */
+ net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
+ net_fill_flow6(n, px, pxlen, pos, flen);
+ ADVANCE(pos, len, flen);
+
+ bgp_rte_update(s, n, path_id, a);
+ }
+}
+
+
static const struct bgp_af_desc bgp_af_table[] = {
{
.afi = BGP_AF_IPV4,
@@ -1087,6 +1341,16 @@ static const struct bgp_af_desc bgp_af_table[] = {
.update_next_hop = bgp_update_next_hop_ip,
},
{
+ .afi = BGP_AF_FLOW4,
+ .net = NET_FLOW4,
+ .name = "flow4",
+ .encode_nlri = bgp_encode_nlri_flow4,
+ .decode_nlri = bgp_decode_nlri_flow4,
+ .encode_next_hop = bgp_encode_next_hop_none,
+ .decode_next_hop = bgp_decode_next_hop_none,
+ .update_next_hop = bgp_update_next_hop_none,
+ },
+ {
.afi = BGP_AF_IPV6,
.net = NET_IP6,
.name = "ipv6",
@@ -1106,6 +1370,16 @@ static const struct bgp_af_desc bgp_af_table[] = {
.decode_next_hop = bgp_decode_next_hop_ip6,
.update_next_hop = bgp_update_next_hop_ip,
},
+ {
+ .afi = BGP_AF_FLOW6,
+ .net = NET_FLOW6,
+ .name = "flow6",
+ .encode_nlri = bgp_encode_nlri_flow6,
+ .decode_nlri = bgp_decode_nlri_flow6,
+ .encode_next_hop = bgp_encode_next_hop_none,
+ .decode_next_hop = bgp_decode_next_hop_none,
+ .update_next_hop = bgp_update_next_hop_none,
+ },
};
const struct bgp_af_desc *
@@ -1387,15 +1661,15 @@ bgp_create_end_mark(struct bgp_channel *c, byte *buf)
}
static inline void
-bgp_rx_end_mark(struct bgp_proto *p, u32 afi)
+bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
{
+ struct bgp_proto *p = s->proto;
struct bgp_channel *c = bgp_get_channel(p, afi);
BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
- /* XXXX handle unknown AF in MP_*_NLRI */
if (!c)
- return;
+ DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
if (c->load_state == BFS_LOADING)
c->load_state = BFS_NONE;
@@ -1413,9 +1687,8 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
struct bgp_channel *c = bgp_get_channel(s->proto, afi);
rta *a = NULL;
- /* XXXX handle unknown AF in MP_*_NLRI */
if (!c)
- return;
+ DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
s->channel = c;
s->add_path = c->add_path_rx;
@@ -1523,12 +1796,12 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
/* Check for End-of-RIB marker */
if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
- { bgp_rx_end_mark(p, BGP_AF_IPV4); goto done; }
+ { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
/* Check for MP End-of-RIB marker */
if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
- !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af) /* XXXX See RFC 7606 5.2 */
- { bgp_rx_end_mark(p, s.mp_unreach_af); goto done; }
+ !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
+ { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
if (s.ip_unreach_len)
bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);