diff options
author | Jan Moskyto Matejka <mq@ucw.cz> | 2017-02-22 11:58:04 +0100 |
---|---|---|
committer | Jan Moskyto Matejka <mq@ucw.cz> | 2017-02-22 11:58:04 +0100 |
commit | c609d039860f97f400d2cf0e9ca2b4e87b3fd1cc (patch) | |
tree | 6141291f6d6fbc0a90320f39c01bde49a119eadf /proto/bgp | |
parent | 62e64905b76b88da72c522eac9276a74f60c9592 (diff) | |
parent | 2be9218a3b1dfcc8e42c8d118e95f2074d9f7a7c (diff) |
Merge branch 'int-new' into nexthop-merged
Diffstat (limited to 'proto/bgp')
-rw-r--r-- | proto/bgp/attrs.c | 155 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 154 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 43 | ||||
-rw-r--r-- | proto/bgp/config.Y | 6 | ||||
-rw-r--r-- | proto/bgp/packets.c | 299 |
5 files changed, 542 insertions, 115 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 1b124a17..f2a8e8b5 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -41,24 +41,6 @@ * specifies that such updates should be ignored, but that is generally * a bad idea. * - * Error checking of optional transitive attributes is done according to - * draft-ietf-idr-optional-transitive-03, but errors are handled always - * as withdraws. - * - * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed, - * but unknown segments cause a session drop with Malformed AS_PATH - * error (see validate_path()). The behavior in such case is not - * explicitly specified by RFC 4271. RFC 5065 specifies that - * inconsistent AS_CONFED_* segments should cause a session drop, but - * implementations that pass invalid AS_CONFED_* segments are - * widespread. - * - * Error handling of AS4_* attributes is done as specified by - * draft-ietf-idr-rfc4893bis-03. There are several possible - * inconsistencies between AGGREGATOR and AS4_AGGREGATOR that are not - * handled by that draft, these are logged and ignored (see - * bgp_reconstruct_4b_attrs()). - * * BGP attribute table has several hooks: * * export - Hook that validates and normalizes attribute during export phase. @@ -281,11 +263,19 @@ bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size) static void bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { + struct bgp_proto *p = s->proto; + int as_length = s->as4_session ? 4 : 2; + int as_confed = p->cf->confederation && p->is_interior; char err[128]; - if (!as_path_valid(data, len, (s->as4_session ? 4 : 2), err, sizeof(err))) + if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err))) WITHDRAW("Malformed AS_PATH attribute - %s", err); + /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */ + if (p->is_interior && !p->is_internal && + ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE))) + WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE"); + if (!s->as4_session) { /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */ @@ -603,11 +593,20 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt if (len < 6) DISCARD(BAD_LENGTH, "AS4_PATH", len); - if (!as_path_valid(data, len, 4, err, sizeof(err))) + if (!as_path_valid(data, len, 4, 1, err, sizeof(err))) DISCARD("Malformed AS4_PATH attribute - %s", err); - /* XXXX remove CONFED segments */ - bgp_set_attr_data(to, s->pool, BA_AS4_PATH, flags, data, len); + struct adata *a = lp_alloc_adata(s->pool, len); + memcpy(a->data, data, len); + + /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */ + if (as_path_contains_confed(a)) + { + REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute"); + a = as_path_strip_confed(s->pool, a); + } + + bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a); } static void @@ -1042,7 +1041,7 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) if (bgp_as_path_loopy(p, attrs, p->local_as)) goto withdraw; - /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4 */ + /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */ if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as)) goto withdraw; @@ -1221,8 +1220,17 @@ bgp_init_prefix_table(struct bgp_channel *c) { HASH_INIT(c->prefix_hash, c->pool, 8); - c->prefix_slab = sl_new(c->pool, sizeof(struct bgp_prefix) + - net_addr_length[c->c.net_type]); + uint alen = net_addr_length[c->c.net_type]; + c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL; +} + +void +bgp_free_prefix_table(struct bgp_channel *c) +{ + HASH_FREE(c->prefix_hash); + + rfree(c->prefix_slab); + c->prefix_slab = NULL; } static struct bgp_prefix * @@ -1237,7 +1245,11 @@ bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id) return px; } - px = sl_alloc(c->prefix_slab); + if (c->prefix_slab) + px = sl_alloc(c->prefix_slab); + else + px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length); + px->buck_node.next = NULL; px->buck_node.prev = NULL; px->hash = hash; @@ -1254,7 +1266,11 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) { rem_node(&px->buck_node); HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); - sl_free(c->prefix_slab, px); + + if (c->prefix_slab) + sl_free(c->prefix_slab, px); + else + mb_free(px); } @@ -1278,6 +1294,8 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li if (src == NULL) return 0; + // XXXX: Check next hop AF + /* IBGP route reflection, RFC 4456 */ if (p->is_internal && src->is_internal && (p->local_as == src->local_as)) { @@ -1314,82 +1332,86 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li return 0; } -static const adata null_adata; /* adata of length 0 */ -static inline void -bgp_path_prepend(ea_list **attrs, struct linpool *pool, int seg, u32 as, int strip) -{ - eattr *a = bgp_find_attr(*attrs, BA_AS_PATH); - adata *d = as_path_prepend2(pool, a ? a->u.ptr : &null_adata, seg, as, strip); - bgp_set_attr_ptr(attrs, pool, BA_AS_PATH, 0, d); -} - -static inline void -bgp_cluster_list_prepend(ea_list **attrs, struct linpool *pool, u32 id) -{ - eattr *a = bgp_find_attr(*attrs, BA_CLUSTER_LIST); - adata *d = int_set_add(pool, a ? a->u.ptr : NULL, id); - bgp_set_attr_ptr(attrs, pool, BA_CLUSTER_LIST, 0, d); -} +static adata null_adata; /* adata of length 0 */ static ea_list * -bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs, struct linpool *pool) +bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool) { struct proto *SRC = e->attrs->src->proto; struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; struct bgp_export_state s = { .proto = p, .channel =c, .pool = pool, .src = src, .route = e }; + ea_list *attrs = attrs0; eattr *a; + adata *ad; /* ORIGIN attribute - mandatory, attach if missing */ - if (! bgp_find_attr(attrs, BA_ORIGIN)) + if (! bgp_find_attr(attrs0, BA_ORIGIN)) bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); + /* AS_PATH attribute - mandatory */ + a = bgp_find_attr(attrs0, BA_AS_PATH); + ad = a ? a->u.ptr : &null_adata; + + /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */ + if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad)) + ad = as_path_strip_confed(pool, ad); + /* AS_PATH attribute - keep or prepend ASN */ if (p->is_internal || (p->rs_client && src && src->rs_client)) { /* IBGP or route server -> just ensure there is one */ - if (! bgp_find_attr(attrs, BA_AS_PATH)) - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, lp_alloc_adata(pool, 0)); + if (!a) + bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata); } else if (p->is_interior) { - /* Confederation -> prepend ASN as CONFED_SEQUENCE, keep CONFED_* segments */ - bgp_path_prepend(&attrs, pool, AS_PATH_CONFED_SEQUENCE, p->public_as, 0); + /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */ + ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as); + bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); } else /* Regular EBGP (no RS, no confederation) */ { - /* Regular EBGP -> prepend ASN as regular segment, strip CONFED_* segments */ - bgp_path_prepend(&attrs, pool, AS_PATH_SEQUENCE, p->public_as, 1); + /* Regular EBGP -> prepend ASN as regular sequence */ + ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as); + bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); /* MULTI_EXIT_DESC attribute - accept only if set in export filter */ - a = bgp_find_attr(attrs, BA_MULTI_EXIT_DISC); + a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC); if (a && !(a->type & EAF_FRESH)) bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC); } /* NEXT_HOP attribute - delegated to AF-specific hook */ - a = bgp_find_attr(attrs, BA_NEXT_HOP); + a = bgp_find_attr(attrs0, BA_NEXT_HOP); bgp_update_next_hop(&s, a, &attrs); /* LOCAL_PREF attribute - required for IBGP, attach if missing */ - if (p->is_interior && ! bgp_find_attr(attrs, BA_LOCAL_PREF)) + if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF)) bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); /* IBGP route reflection, RFC 4456 */ if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as)) { /* ORIGINATOR_ID attribute - attach if not already set */ - if (! bgp_find_attr(attrs, BA_ORIGINATOR_ID)) + if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID)) bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id); /* CLUSTER_LIST attribute - prepend cluster ID */ + a = bgp_find_attr(attrs0, BA_CLUSTER_LIST); + ad = a ? a->u.ptr : NULL; + + /* Prepend src cluster ID */ if (src->rr_cluster_id) - bgp_cluster_list_prepend(&attrs, pool, src->rr_cluster_id); + ad = int_set_prepend(pool, ad, src->rr_cluster_id); - /* Handle different src and dst cluster ID - prepend both ones */ + /* Prepend dst cluster ID if src and dst clusters are different */ if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id)) - bgp_cluster_list_prepend(&attrs, pool, p->rr_cluster_id); + ad = int_set_prepend(pool, ad, p->rr_cluster_id); + + /* Should be at least one prepended cluster ID */ + bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad); } /* AS4_* transition attributes, RFC 6793 4.2.2 */ @@ -1410,6 +1432,12 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at } } + /* + * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above + * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute + * should be checked in AF-specific hooks. + */ + /* Apply per-attribute export hooks for validatation and normalization */ return bgp_export_attrs(&s, attrs); } @@ -1452,10 +1480,12 @@ bgp_get_neighbor(rte *r) eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); u32 as; - if (e && as_path_get_first(e->u.ptr, &as)) + if (e && as_path_get_first_regular(e->u.ptr, &as)) return as; - else - return ((struct bgp_proto *) r->attrs->src->proto)->remote_as; + + /* If AS_PATH is not defined, we treat rte as locally originated */ + struct bgp_proto *p = (void *) r->attrs->src->proto; + return p->cf->confederation ?: p->local_as; } static inline int @@ -1653,7 +1683,7 @@ bgp_rte_mergable(rte *pri, rte *sec) } /* RFC 4271 9.1.2.2. d) Prefer external peers */ - if (pri_bgp->is_internal != sec_bgp->is_internal) + if (pri_bgp->is_interior != sec_bgp->is_interior) return 0; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ @@ -1843,6 +1873,7 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) /* Handle AS_PATH attribute */ if (p2 && p4) { + /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */ int p2_len = as_path_getlen(p2->u.ptr); int p4_len = as_path_getlen(p4->u.ptr); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 2ca153ab..5e95e6b4 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -521,12 +521,17 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) if (peer->gr_aware) c->load_state = BFS_LOADING; + c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop); c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX); c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX); - // XXXX reset back to non-ANY? + /* Update RA mode */ if (c->add_path_tx) c->c.ra_mode = RA_ANY; + else if (c->cf->secondary) + c->c.ra_mode = RA_ACCEPTED; + else + c->c.ra_mode = RA_OPTIMAL; } p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32)); @@ -554,6 +559,10 @@ bgp_conn_leave_established_state(struct bgp_proto *p) BGP_TRACE(D_EVENTS, "BGP session closed"); p->conn = NULL; + // XXXX free these tables to avoid memory leak during graceful restart + // bgp_free_prefix_table(p); + // bgp_free_bucket_table(p); + if (p->p.proto_state == PS_UP) bgp_stop(p, 0); } @@ -1411,8 +1420,6 @@ bgp_channel_init(struct channel *C, struct channel_config *CF) struct bgp_channel *c = (void *) C; struct bgp_channel_config *cf = (void *) CF; - C->ra_mode = cf->secondary ? RA_ACCEPTED : RA_OPTIMAL; - c->cf = cf; c->afi = cf->afi; c->desc = bgp_get_af_desc(c->afi); @@ -1757,10 +1764,131 @@ bgp_get_status(struct proto *P, byte *buf) } static void +bgp_show_afis(int code, char *s, u32 *afis, uint count) +{ + buffer b; + LOG_BUFFER_INIT(b); + + buffer_puts(&b, s); + + for (u32 *af = afis; af < (afis + count); af++) + { + const struct bgp_af_desc *desc = bgp_get_af_desc(*af); + if (desc) + buffer_print(&b, " %s", desc->name); + else + buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af)); + } + + if (b.pos == b.end) + strcpy(b.end - 32, " ... <too long>"); + + cli_msg(code, b.start); +} + +static void +bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) +{ + struct bgp_af_caps *ac; + uint any_mp_bgp = 0; + uint any_gr_able = 0; + uint any_add_path = 0; + uint any_ext_next_hop = 0; + u32 *afl1 = alloca(caps->af_count * sizeof(u32)); + u32 *afl2 = alloca(caps->af_count * sizeof(u32)); + uint afn1, afn2; + + WALK_AF_CAPS(caps, ac) + { + any_mp_bgp |= ac->ready; + any_gr_able |= ac->gr_able; + any_add_path |= ac->add_path; + any_ext_next_hop |= ac->ext_next_hop; + } + + if (any_mp_bgp) + { + cli_msg(-1006, " Multiprotocol"); + + afn1 = 0; + WALK_AF_CAPS(caps, ac) + if (ac->ready) + afl1[afn1++] = ac->afi; + + bgp_show_afis(-1006, " AF announced:", afl1, afn1); + } + + if (caps->route_refresh) + cli_msg(-1006, " Route refresh"); + + if (any_ext_next_hop) + { + cli_msg(-1006, " Extended next hop"); + + afn1 = 0; + WALK_AF_CAPS(caps, ac) + if (ac->ext_next_hop) + afl1[afn1++] = ac->afi; + + bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1); + } + + if (caps->ext_messages) + cli_msg(-1006, " Extended message"); + + if (caps->gr_aware) + cli_msg(-1006, " Graceful restart"); + + if (any_gr_able) + { + /* Continues from gr_aware */ + cli_msg(-1006, " Restart time: %u", caps->gr_time); + if (caps->gr_flags & BGP_GRF_RESTART) + cli_msg(-1006, " Restart recovery"); + + afn1 = afn2 = 0; + WALK_AF_CAPS(caps, ac) + { + if (ac->gr_able) + afl1[afn1++] = ac->afi; + + if (ac->gr_af_flags & BGP_GRF_FORWARDING) + afl2[afn2++] = ac->afi; + } + + bgp_show_afis(-1006, " AF supported:", afl1, afn1); + bgp_show_afis(-1006, " AF preserved:", afl2, afn2); + } + + if (caps->as4_support) + cli_msg(-1006, " 4-octet AS numbers"); + + if (any_add_path) + { + cli_msg(-1006, " ADD-PATH"); + + afn1 = afn2 = 0; + WALK_AF_CAPS(caps, ac) + { + if (ac->add_path & BGP_ADD_PATH_RX) + afl1[afn1++] = ac->afi; + + if (ac->add_path & BGP_ADD_PATH_TX) + afl2[afn2++] = ac->afi; + } + + bgp_show_afis(-1006, " RX:", afl1, afn1); + bgp_show_afis(-1006, " TX:", afl2, afn2); + } + + if (caps->enhanced_refresh) + cli_msg(-1006, " Enhanced refresh"); +} + +static void bgp_show_proto_info(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - struct bgp_conn *c = p->conn; cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface); @@ -1789,15 +1917,11 @@ bgp_show_proto_info(struct proto *P) else if (P->proto_state == PS_UP) { cli_msg(-1006, " Neighbor ID: %R", p->remote_id); + cli_msg(-1006, " Local capabilities"); + bgp_show_capabilities(p, p->conn->local_caps); + cli_msg(-1006, " Neighbor capabilities"); + bgp_show_capabilities(p, p->conn->remote_caps); /* XXXX - cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s", - c->peer_refresh_support ? " refresh" : "", - c->peer_enhanced_refresh_support ? " enhanced-refresh" : "", - c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""), - c->peer_as4_support ? " AS4" : "", - (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "", - (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "", - c->peer_ext_messages_support ? " ext-messages" : ""); cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s", p->is_internal ? "internal" : "external", p->cf->multihop ? " multihop" : "", @@ -1810,9 +1934,9 @@ bgp_show_proto_info(struct proto *P) */ cli_msg(-1006, " Source address: %I", p->source_addr); cli_msg(-1006, " Hold timer: %d/%d", - tm_remains(c->hold_timer), c->hold_time); + tm_remains(p->conn->hold_timer), p->conn->hold_time); cli_msg(-1006, " Keepalive timer: %d/%d", - tm_remains(c->keepalive_timer), c->keepalive_time); + tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time); } if ((p->last_error_class != BE_NONE) && @@ -1846,7 +1970,7 @@ struct protocol proto_bgp = { .template = "bgp%d", .attr_class = EAP_BGP, .preference = DEF_PREF_BGP, - .channel_mask = NB_IP, + .channel_mask = NB_IP | NB_FLOW4 | NB_FLOW6, .proto_size = sizeof(struct bgp_proto), .config_size = sizeof(struct bgp_config), .postconfig = bgp_postconfig, diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 4ecb86a0..5d2539d5 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -31,6 +31,7 @@ struct eattr; #define BGP_SAFI_UNICAST 1 #define BGP_SAFI_MULTICAST 2 +#define BGP_SAFI_FLOW 133 /* Internal AF codes */ @@ -42,6 +43,8 @@ struct eattr; #define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST ) #define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST ) #define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST ) +#define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW ) +#define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW ) struct bgp_write_state; @@ -70,7 +73,7 @@ struct bgp_config { u16 local_port; /* Local listening port */ u16 remote_port; /* Neighbor destination port */ int multihop; /* Number of hops if multihop */ - int strict_bind; /* Bind listening socket to local address XXXX */ + int strict_bind; /* Bind listening socket to local address */ int ttl_security; /* Enable TTL security [RFC 5082] */ int compare_path_lengths; /* Use path lengths when selecting best route */ int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */ @@ -120,6 +123,7 @@ struct bgp_channel_config { u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ u8 gr_able; /* Allow full graceful restart for the channel */ + u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */ u8 add_path; /* Use ADD-PATH extension [RFC 7911] */ struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ @@ -151,6 +155,7 @@ struct bgp_af_caps { u8 ready; /* Multiprotocol capability, RFC 4760 */ u8 gr_able; /* Graceful restart support, RFC 4724 */ u8 gr_af_flags; /* Graceful restart per-AF flags */ + u8 ext_next_hop; /* Extended IPv6 next hop, RFC 5549 */ u8 add_path; /* Multiple paths support, RFC 7911 */ }; @@ -171,6 +176,10 @@ struct bgp_caps { struct bgp_af_caps af_data[0]; /* Per-AF capability data */ }; +#define WALK_AF_CAPS(caps,ac) \ + for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++) + + struct bgp_socket { node n; /* Node in global bgp_sockets */ sock *sk; /* Real listening socket */ @@ -267,6 +276,8 @@ struct bgp_channel { u8 gr_ready; /* Neighbor could do GR on this AF */ u8 gr_active; /* Neighbor is doing GR and keeping fwd state */ + u8 ext_next_hop; /* Session allows both IPv4 and IPv6 next hops */ + u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ @@ -446,18 +457,6 @@ bgp_unset_attr(ea_list **to, struct linpool *pool, uint code) { eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; } - - -/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6 - * we store two addesses in it - a global address and a link local address. - */ -#ifdef XXX -#define NEXT_HOP_LENGTH (2*sizeof(ip_addr)) -static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; } -#define NEXT_HOP_LENGTH sizeof(ip_addr) -static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; } -#endif - int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end); ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len); @@ -510,26 +509,22 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BAF_PARTIAL 0x20 #define BAF_EXT_LEN 0x10 -#define BA_ORIGIN 0x01 /* [RFC1771] */ /* WM */ +#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */ #define BA_AS_PATH 0x02 /* WM */ #define BA_NEXT_HOP 0x03 /* WM */ #define BA_MULTI_EXIT_DISC 0x04 /* ON */ #define BA_LOCAL_PREF 0x05 /* WD */ #define BA_ATOMIC_AGGR 0x06 /* WD */ #define BA_AGGREGATOR 0x07 /* OT */ -#define BA_COMMUNITY 0x08 /* [RFC1997] */ /* OT */ -#define BA_ORIGINATOR_ID 0x09 /* [RFC1966] */ /* ON */ -#define BA_CLUSTER_LIST 0x0a /* ON */ -/* We don't support these: */ -#define BA_DPA 0x0b /* ??? */ -#define BA_ADVERTISER 0x0c /* [RFC1863] */ -#define BA_RCID_PATH 0x0d -#define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */ -#define BA_MP_UNREACH_NLRI 0x0f +#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */ +#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */ +#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */ +#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */ +#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */ #define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */ #define BA_AS4_PATH 0x11 /* RFC 6793 */ #define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ -#define BA_LARGE_COMMUNITY 0x20 /* [draft-ietf-idr-large-community] */ +#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ /* BGP connection states */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 10a338d8..8c63b331 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -28,7 +28,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, - STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST) + STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6) %type <i32> bgp_afi @@ -139,6 +139,8 @@ bgp_afi: | IPV6 { $$ = BGP_AF_IPV6; } | IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; } | IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; } + | FLOW4 { $$ = BGP_AF_FLOW4; } + | FLOW6 { $$ = BGP_AF_FLOW6; } ; bgp_channel_start: bgp_afi @@ -150,6 +152,7 @@ bgp_channel_start: bgp_afi this_channel = channel_config_new(&channel_bgp, desc->net, this_proto); BGP_CC->c.name = desc->name; + BGP_CC->c.ra_mode = RA_UNDEF; BGP_CC->afi = $1; BGP_CC->gr_able = 0xff; /* undefined */ }; @@ -166,6 +169,7 @@ bgp_channel_item: | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; } | SECONDARY bool { BGP_CC->secondary = $2; } | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; } + | EXTENDED NEXT HOP bool { BGP_CC->ext_next_hop = $4; } | ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; } | ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; } | ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 9380f999..2106e0d1 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -20,6 +20,7 @@ #include "nest/mrtdump.h" #include "conf/conf.h" #include "lib/unaligned.h" +#include "lib/flowspec.h" #include "lib/socket.h" #include "nest/cli.h" @@ -184,9 +185,6 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf) /* Capability negotiation as per RFC 5492 */ -#define WALK_AF_CAPS(caps,ac) \ - for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++) - const struct bgp_af_caps * bgp_find_af_caps(struct bgp_caps *caps, u32 afi) { @@ -230,6 +228,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) struct bgp_channel *c; struct bgp_caps *caps; struct bgp_af_caps *ac; + uint any_ext_next_hop = 0; uint any_add_path = 0; byte *data; @@ -261,6 +260,9 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) ac->afi = c->afi; ac->ready = 1; + ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop; + any_ext_next_hop |= ac->ext_next_hop; + ac->add_path = c->cf->add_path; any_add_path |= ac->add_path; @@ -279,6 +281,12 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) /* Create capability list in buffer */ + /* + * Note that max length is ~ 20+14*af_count. With max 6 channels that is + * 104. Option limit is 253 and buffer size is 4096, so we cannot overflow + * unless we add new capabilities or more AFs. + */ + WALK_AF_CAPS(caps, ac) if (ac->ready) { @@ -294,6 +302,23 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) *buf++ = 0; /* Capability data length */ } + if (any_ext_next_hop) + { + *buf++ = 5; /* Capability 5: Support for extended next hop */ + *buf++ = 0; /* Capability data length, will be fixed later */ + data = buf; + + WALK_AF_CAPS(caps, ac) + if (ac->ext_next_hop) + { + put_af4(buf, ac->afi); + put_u16(buf+4, BGP_AFI_IPV6); + buf += 6; + } + + data[-1] = buf - data; + } + if (caps->ext_messages) { *buf++ = 6; /* Capability 6: Support for extended messages */ @@ -352,8 +377,6 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) *buf++ = 0; /* Capability data length */ } - /* FIXME: Should not XXXX 255 */ - return buf; } @@ -392,6 +415,23 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i caps->route_refresh = 1; break; + case 5: /* Extended next hop encoding capability, RFC 5549 */ + if (cl % 6) + goto err; + + for (i = 0; i < cl; i += 6) + { + /* Specified only for IPv4 prefixes with IPv6 next hops */ + if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) || + (get_u16(pos+2+i+4) != BGP_AFI_IPV6)) + continue; + + af = get_af4(pos+2+i); + ac = bgp_get_af_caps(caps, af); + ac->ext_next_hop = 1; + } + break; + case 6: /* Extended message length capability, RFC draft */ if (cl != 0) goto err; @@ -673,9 +713,13 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define REPORT(msg, args...) \ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) +#define DISCARD(msg, args...) \ + ({ REPORT(msg, ## args); return; }) + #define WITHDRAW(msg, args...) \ ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) +#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" #define NO_NEXT_HOP "Missing NEXT_HOP attribute" @@ -792,6 +836,32 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) WITHDRAW(BAD_NEXT_HOP); } +static uint +bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED) +{ + return 0; +} + +static void +bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED) +{ + /* + * Although we expect no next hop and RFC 7606 7.11 states that attribute + * MP_REACH_NLRI with unexpected next hop length is considered malformed, + * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt. + */ + + return; +} + +static void +bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) +{ + /* NEXT_HOP shall not pass */ + if (a) + bgp_unset_attr(to, s->pool, BA_NEXT_HOP); +} + /* * UPDATE @@ -1065,6 +1135,190 @@ bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a) } +static uint +bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= 4)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_flow4 *net = (void *) px->net; + uint flen = net->length - sizeof(net_addr_flow4); + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + if (flen > size) + break; + + /* Copy whole flow data including length */ + memcpy(pos, net->data, flen); + ADVANCE(pos, size, flen); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 4) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + if (len < 2) + bgp_parse_error(s, 1); + + /* Decode flow length */ + uint hlen = flow_hdr_length(pos); + uint dlen = flow_read_length(pos); + uint flen = hlen + dlen; + byte *data = pos + hlen; + + if (len < flen) + bgp_parse_error(s, 1); + + /* Validate flow data */ + enum flow_validated_state r = flow4_validate(data, dlen); + if (r != FLOW_ST_VALID) + { + log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r)); + bgp_parse_error(s, 1); + } + + if (data[0] != FLOW_TYPE_DST_PREFIX) + { + log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name); + bgp_parse_error(s, 1); + } + + /* Decode dst prefix */ + ip4_addr px = IP4_NONE; + uint pxlen = data[1]; + + // FIXME: Use some generic function + memcpy(&px, data, BYTES(pxlen)); + px = ip4_and(px, ip4_mkmask(pxlen)); + + /* Prepare the flow */ + net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen); + net_fill_flow4(n, px, pxlen, pos, flen); + ADVANCE(pos, len, flen); + + bgp_rte_update(s, n, path_id, a); + } +} + + +static uint +bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= 4)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_flow6 *net = (void *) px->net; + uint flen = net->length - sizeof(net_addr_flow6); + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + if (flen > size) + break; + + /* Copy whole flow data including length */ + memcpy(pos, net->data, flen); + ADVANCE(pos, size, flen); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 4) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + if (len < 2) + bgp_parse_error(s, 1); + + /* Decode flow length */ + uint hlen = flow_hdr_length(pos); + uint dlen = flow_read_length(pos); + uint flen = hlen + dlen; + byte *data = pos + hlen; + + if (len < flen) + bgp_parse_error(s, 1); + + /* Validate flow data */ + enum flow_validated_state r = flow6_validate(data, dlen); + if (r != FLOW_ST_VALID) + { + log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r)); + bgp_parse_error(s, 1); + } + + if (data[0] != FLOW_TYPE_DST_PREFIX) + { + log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name); + bgp_parse_error(s, 1); + } + + /* Decode dst prefix */ + ip6_addr px = IP6_NONE; + uint pxlen = data[1]; + + // FIXME: Use some generic function + memcpy(&px, data, BYTES(pxlen)); + px = ip6_and(px, ip6_mkmask(pxlen)); + + /* Prepare the flow */ + net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen); + net_fill_flow6(n, px, pxlen, pos, flen); + ADVANCE(pos, len, flen); + + bgp_rte_update(s, n, path_id, a); + } +} + + static const struct bgp_af_desc bgp_af_table[] = { { .afi = BGP_AF_IPV4, @@ -1087,6 +1341,16 @@ static const struct bgp_af_desc bgp_af_table[] = { .update_next_hop = bgp_update_next_hop_ip, }, { + .afi = BGP_AF_FLOW4, + .net = NET_FLOW4, + .name = "flow4", + .encode_nlri = bgp_encode_nlri_flow4, + .decode_nlri = bgp_decode_nlri_flow4, + .encode_next_hop = bgp_encode_next_hop_none, + .decode_next_hop = bgp_decode_next_hop_none, + .update_next_hop = bgp_update_next_hop_none, + }, + { .afi = BGP_AF_IPV6, .net = NET_IP6, .name = "ipv6", @@ -1106,6 +1370,16 @@ static const struct bgp_af_desc bgp_af_table[] = { .decode_next_hop = bgp_decode_next_hop_ip6, .update_next_hop = bgp_update_next_hop_ip, }, + { + .afi = BGP_AF_FLOW6, + .net = NET_FLOW6, + .name = "flow6", + .encode_nlri = bgp_encode_nlri_flow6, + .decode_nlri = bgp_decode_nlri_flow6, + .encode_next_hop = bgp_encode_next_hop_none, + .decode_next_hop = bgp_decode_next_hop_none, + .update_next_hop = bgp_update_next_hop_none, + }, }; const struct bgp_af_desc * @@ -1387,15 +1661,15 @@ bgp_create_end_mark(struct bgp_channel *c, byte *buf) } static inline void -bgp_rx_end_mark(struct bgp_proto *p, u32 afi) +bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi) { + struct bgp_proto *p = s->proto; struct bgp_channel *c = bgp_get_channel(p, afi); BGP_TRACE(D_PACKETS, "Got END-OF-RIB"); - /* XXXX handle unknown AF in MP_*_NLRI */ if (!c) - return; + DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi)); if (c->load_state == BFS_LOADING) c->load_state = BFS_NONE; @@ -1413,9 +1687,8 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis struct bgp_channel *c = bgp_get_channel(s->proto, afi); rta *a = NULL; - /* XXXX handle unknown AF in MP_*_NLRI */ if (!c) - return; + DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi)); s->channel = c; s->add_path = c->add_path_rx; @@ -1523,12 +1796,12 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) /* Check for End-of-RIB marker */ if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len) - { bgp_rx_end_mark(p, BGP_AF_IPV4); goto done; } + { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; } /* Check for MP End-of-RIB marker */ if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len && - !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af) /* XXXX See RFC 7606 5.2 */ - { bgp_rx_end_mark(p, s.mp_unreach_af); goto done; } + !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af) + { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; } if (s.ip_unreach_len) bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0); |