diff options
Diffstat (limited to 'proto/bgp')
-rw-r--r-- | proto/bgp/attrs.c | 4 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 232 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 27 | ||||
-rw-r--r-- | proto/bgp/config.Y | 22 | ||||
-rw-r--r-- | proto/bgp/packets.c | 121 |
5 files changed, 318 insertions, 88 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 7c6f2ee9..69c4b172 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1302,7 +1302,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 20 +#define PXH_PARAMS /8, *2, 2, 2, 8, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1730,7 +1730,7 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */ - return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); + return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0; } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8dedde9f..b68575a5 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -129,6 +129,9 @@ static list bgp_sockets; /* Global list of listening sockets */ static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); +static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn); +static void bgp_setup_sk(struct bgp_conn *conn, sock *s); +static void bgp_send_open(struct bgp_conn *conn); static void bgp_update_bfd(struct bgp_proto *p, int use_bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); @@ -149,7 +152,7 @@ bgp_open(struct bgp_proto *p) struct bgp_socket *bs = NULL; struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : - (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6); + (p->ipv4 ? IPA_NONE4 : IPA_NONE6); uint port = p->cf->local_port; /* FIXME: Add some global init? */ @@ -272,8 +275,17 @@ bgp_startup(struct bgp_proto *p) BGP_TRACE(D_EVENTS, "Started"); p->start_state = BSS_CONNECT; - if (!p->cf->passive) + if (!p->passive) bgp_active(p); + + if (p->postponed_sk) + { + /* Apply postponed incoming connection */ + bgp_setup_conn(p, &p->incoming_conn); + bgp_setup_sk(&p->incoming_conn, p->postponed_sk); + bgp_send_open(&p->incoming_conn); + p->postponed_sk = NULL; + } } static void @@ -387,7 +399,7 @@ bgp_close_conn(struct bgp_conn *conn) void bgp_update_startup_delay(struct bgp_proto *p) { - struct bgp_config *cf = p->cf; + const struct bgp_config *cf = p->cf; DBG("BGP: Updating startup delay\n"); @@ -410,7 +422,7 @@ bgp_update_startup_delay(struct bgp_proto *p) } static void -bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len) +bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len) { switch (conn->state) { @@ -426,7 +438,13 @@ bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint le case BS_OPENSENT: case BS_OPENCONFIRM: case BS_ESTABLISHED: - bgp_error(conn, 6, subcode, data, len); + if (subcode < 0) + { + bgp_conn_enter_close_state(conn); + bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE); + } + else + bgp_error(conn, 6, subcode, data, len); return; default: @@ -456,7 +474,7 @@ bgp_decision(void *vp) if ((p->p.proto_state == PS_START) && (p->outgoing_conn.state == BS_IDLE) && (p->incoming_conn.state != BS_OPENCONFIRM) && - !p->cf->passive) + !p->passive) bgp_active(p); if ((p->p.proto_state == PS_STOP) && @@ -465,8 +483,31 @@ bgp_decision(void *vp) bgp_down(p); } +static struct bgp_proto * +bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip) +{ + struct symbol *sym; + char fmt[SYM_MAX_LEN]; + + bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits); + + /* This is hack, we would like to share config, but we need to copy it now */ + new_config = config; + cfg_mem = config->mem; + conf_this_scope = config->root_scope; + sym = cf_default_name(fmt, &(pp->dynamic_name_counter)); + proto_clone_config(sym, pp->p.cf); + new_config = NULL; + cfg_mem = NULL; + + /* Just pass remote_ip to bgp_init() */ + ((struct bgp_config *) sym->proto)->remote_ip = remote_ip; + + return (void *) proto_spawn(sym->proto, 0); +} + void -bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len) +bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) { proto_notify_state(&p->p, PS_STOP); bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); @@ -491,6 +532,7 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn) } static const struct bgp_af_caps dummy_af_caps = { }; +static const struct bgp_af_caps basic_af_caps = { .ready = 1 }; void bgp_conn_enter_established_state(struct bgp_conn *conn) @@ -503,8 +545,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) BGP_TRACE(D_EVENTS, "BGP session established"); /* For multi-hop BGP sessions */ - if (ipa_zero(p->source_addr)) - p->source_addr = conn->sk->saddr; + if (ipa_zero(p->local_ip)) + p->local_ip = conn->sk->saddr; + + /* For promiscuous sessions */ + if (!p->remote_as) + p->remote_as = conn->received_as; /* In case of LLv6 is not valid during BGP start */ if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6) @@ -541,6 +587,13 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi); + /* Use default if capabilities were not announced */ + if (!local->length && (c->afi == BGP_AF_IPV4)) + loc = &basic_af_caps; + + if (!peer->length && (c->afi == BGP_AF_IPV4)) + rem = &basic_af_caps; + /* Ignore AFIs that were not announced in multiprotocol capability */ if (!loc || !loc->ready) loc = &dummy_af_caps; @@ -880,6 +933,7 @@ bgp_send_open(struct bgp_conn *conn) conn->sk->rx_hook = bgp_rx; conn->sk->tx_hook = bgp_tx; tm_stop(conn->connect_timer); + bgp_prepare_capabilities(conn); bgp_schedule_packet(conn, NULL, PKT_OPEN); bgp_conn_set_state(conn, BS_OPENSENT); bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); @@ -1039,8 +1093,8 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c DBG("BGP: Connecting\n"); sock *s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; - s->saddr = p->source_addr; - s->daddr = p->cf->remote_ip; + s->saddr = p->local_ip; + s->daddr = p->remote_ip; s->dport = p->cf->remote_port; s->iface = p->neigh ? p->neigh->iface : NULL; s->vrf = p->p.vrf; @@ -1075,6 +1129,9 @@ err: return; } +static inline int bgp_is_dynamic(struct bgp_proto *p) +{ return ipa_zero(p->remote_ip); } + /** * bgp_find_proto - find existing proto for incoming connection * @sk: TCP socket @@ -1083,6 +1140,7 @@ err: static struct bgp_proto * bgp_find_proto(sock *sk) { + struct bgp_proto *best = NULL; struct bgp_proto *p; /* sk->iface is valid only if src or dst address is link-local */ @@ -1090,13 +1148,20 @@ bgp_find_proto(sock *sk) WALK_LIST(p, proto_list) if ((p->p.proto == &proto_bgp) && - (p->sock == sk->data) && - ipa_equal(p->cf->remote_ip, sk->daddr) && + (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && + (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && + (p->p.vrf == sk->vrf) && + (p->cf->local_port == sk->sport) && (!link || (p->cf->iface == sk->iface)) && (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr))) - return p; + { + best = p; - return NULL; + if (!bgp_is_dynamic(p)) + break; + } + + return best; } /** @@ -1175,6 +1240,16 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) sk_reallocate(sk); } + /* For dynamic BGP, spawn new instance and postpone the socket */ + if (bgp_is_dynamic(p)) + { + p = bgp_spawn(p, sk->daddr); + p->postponed_sk = sk; + rmove(sk, p->p.pool); + return 0; + } + + rmove(sk, p->p.pool); bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); bgp_send_open(&p->incoming_conn); @@ -1201,11 +1276,11 @@ bgp_start_neighbor(struct bgp_proto *p) { /* Called only for single-hop BGP sessions */ - if (ipa_zero(p->source_addr)) - p->source_addr = p->neigh->ifa->ip; + if (ipa_zero(p->local_ip)) + p->local_ip = p->neigh->ifa->ip; - if (ipa_is_link_local(p->source_addr)) - p->link_addr = p->source_addr; + if (ipa_is_link_local(p->local_ip)) + p->link_addr = p->local_ip; else if (p->neigh->iface->llv6) p->link_addr = p->neigh->iface->llv6->ip; @@ -1293,8 +1368,8 @@ bgp_bfd_notify(struct bfd_request *req) static void bgp_update_bfd(struct bgp_proto *p, int use_bfd) { - if (use_bfd && !p->bfd_req) - p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr, + if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p)) + p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip, p->cf->multihop ? NULL : p->neigh->iface, bgp_bfd_notify, p); @@ -1375,7 +1450,7 @@ static void bgp_start_locked(struct object_lock *lock) { struct bgp_proto *p = lock->data; - struct bgp_config *cf = p->cf; + const struct bgp_config *cf = p->cf; if (p->p.proto_state != PS_START) { @@ -1385,17 +1460,17 @@ bgp_start_locked(struct object_lock *lock) DBG("BGP: Got lock\n"); - if (cf->multihop) + if (cf->multihop || bgp_is_dynamic(p)) { /* Multi-hop sessions do not use neighbor entries */ bgp_initiate(p); return; } - neighbor *n = neigh_find(&p->p, cf->remote_ip, cf->iface, NEF_STICKY); + neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY); if (!n) { - log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface); + log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface); /* As we do not start yet, we can just disable protocol */ p->p.disabled = 1; bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); @@ -1406,7 +1481,7 @@ bgp_start_locked(struct object_lock *lock) p->neigh = n; if (n->scope <= 0) - BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface); + BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface); else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP)) BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name); else @@ -1417,14 +1492,29 @@ static int bgp_start(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - struct object_lock *lock; + const struct bgp_config *cf = p->cf; + + p->local_ip = cf->local_ip; + p->local_as = cf->local_as; + p->remote_as = cf->remote_as; + p->public_as = cf->local_as; + + /* For dynamic BGP childs, remote_ip is already set */ + if (ipa_nonzero(cf->remote_ip)) + p->remote_ip = cf->remote_ip; + + /* Confederation ID is used for truly external peers */ + if (p->cf->confederation && !p->is_interior) + p->public_as = cf->confederation; + + p->passive = cf->passive || bgp_is_dynamic(p); - DBG("BGP: Startup.\n"); p->start_state = BSS_PREPARE; p->outgoing_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE; p->neigh = NULL; p->bfd_req = NULL; + p->postponed_sk = NULL; p->gr_ready = 0; p->gr_active_num = 0; @@ -1437,7 +1527,6 @@ bgp_start(struct proto *P) p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; p->remote_id = 0; - p->source_addr = p->cf->local_ip; p->link_addr = IPA_NONE; /* Lock all channels when in GR recovery mode */ @@ -1452,9 +1541,9 @@ bgp_start(struct proto *P) * Before attempting to create the connection, we need to lock the port, * so that we are the only instance attempting to talk with that neighbor. */ - + struct object_lock *lock; lock = p->lock = olock_new(P->pool); - lock->addr = p->cf->remote_ip; + lock->addr = p->remote_ip; lock->port = p->cf->remote_port; lock->iface = p->cf->iface; lock->vrf = p->cf->iface ? NULL : p->p.vrf; @@ -1472,7 +1561,7 @@ static int bgp_shutdown(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - uint subcode = 0; + int subcode = 0; char *message = NULL; byte *data = NULL; @@ -1493,6 +1582,7 @@ bgp_shutdown(struct proto *P) case PDC_CMD_DISABLE: case PDC_CMD_SHUTDOWN: + shutdown: subcode = 2; // Errcode 6, 2 - administrative shutdown message = P->message; break; @@ -1502,6 +1592,14 @@ bgp_shutdown(struct proto *P) message = P->message; break; + case PDC_CMD_GR_DOWN: + if ((p->cf->gr_mode != BGP_GR_ABLE) && + (p->cf->llgr_mode != BGP_LLGR_ABLE)) + goto shutdown; + + subcode = -1; // Do not send NOTIFICATION, just close the connection + break; + case PDC_RX_LIMIT_HIT: case PDC_IN_LIMIT_HIT: subcode = 1; // Errcode 6, 1 - max number of prefixes reached @@ -1528,7 +1626,7 @@ bgp_shutdown(struct proto *P) if (message) { uint msg_len = strlen(message); - msg_len = MIN(msg_len, 128); + msg_len = MIN(msg_len, 255); /* Buffer will be freed automatically by protocol shutdown */ data = mb_alloc(p->p.pool, msg_len + 1); @@ -1562,17 +1660,21 @@ bgp_init(struct proto_config *CF) P->rte_modify = bgp_rte_modify_stale; p->cf = cf; - p->local_as = cf->local_as; - p->remote_as = cf->remote_as; - p->public_as = cf->local_as; p->is_internal = (cf->local_as == cf->remote_as); p->is_interior = p->is_internal || cf->confederation_member; p->rs_client = cf->rs_client; p->rr_client = cf->rr_client; - /* Confederation ID is used for truly external peers */ - if (cf->confederation && !p->is_interior) - p->public_as = cf->confederation; + p->ipv4 = ipa_nonzero(cf->remote_ip) ? + ipa_is_ip4(cf->remote_ip) : + (cf->remote_range && (cf->remote_range->type == NET_IP4)); + + p->remote_ip = cf->remote_ip; + p->remote_as = cf->remote_as; + + /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */ + if (cf->c.parent) + cf->remote_ip = IPA_NONE; /* Add all channels */ struct bgp_channel_config *cc; @@ -1604,7 +1706,7 @@ bgp_channel_start(struct channel *C) { struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ip_addr src = p->source_addr; + ip_addr src = p->local_ip; if (c->igp_table_ip4) rt_lock_table(c->igp_table_ip4); @@ -1745,14 +1847,19 @@ void bgp_postconfig(struct proto_config *CF) { struct bgp_config *cf = (void *) CF; - int internal = (cf->local_as == cf->remote_as); - int interior = internal || cf->confederation_member; /* Do not check templates at all */ if (cf->c.class == SYM_TEMPLATE) return; + /* Handle undefined remote_as, zero should mean unspecified external */ + if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL)) + cf->remote_as = cf->local_as; + + int internal = (cf->local_as == cf->remote_as); + int interior = internal || cf->confederation_member; + /* EBGP direct by default, IBGP multihop by default */ if (cf->multihop < 0) cf->multihop = internal ? 64 : 0; @@ -1769,11 +1876,20 @@ bgp_postconfig(struct proto_config *CF) if (!cf->local_as) cf_error("Local AS number must be set"); - if (ipa_zero(cf->remote_ip)) + if (ipa_zero(cf->remote_ip) && !cf->remote_range) cf_error("Neighbor must be configured"); - if (!cf->remote_as) - cf_error("Remote AS number must be set"); + if (ipa_zero(cf->local_ip) && cf->strict_bind) + cf_error("Local address must be configured for strict bind"); + + if (!cf->remote_as && !cf->peer_type) + cf_error("Remote AS number (or peer type) must be set"); + + if ((cf->peer_type == BGP_PT_INTERNAL) && !internal) + cf_error("IBGP cannot have different ASNs"); + + if ((cf->peer_type == BGP_PT_EXTERNAL) && internal) + cf_error("EBGP cannot have the same ASNs"); if (!cf->iface && (ipa_is_link_local(cf->local_ip) || ipa_is_link_local(cf->remote_ip))) @@ -1885,8 +2001,8 @@ static int bgp_reconfigure(struct proto *P, struct proto_config *CF) { struct bgp_proto *p = (void *) P; - struct bgp_config *new = (void *) CF; - struct bgp_config *old = p->cf; + const struct bgp_config *new = (void *) CF; + const struct bgp_config *old = p->cf; if (proto_get_router_id(CF) != p->local_id) return 0; @@ -1896,7 +2012,12 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) // password item is last and must be checked separately OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config)) && ((!old->password && !new->password) - || (old->password && new->password && !strcmp(old->password, new->password))); + || (old->password && new->password && !strcmp(old->password, new->password))) + && ((!old->remote_range && !new->remote_range) + || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range))) + && ((!old->dynamic_name && !new->dynamic_name) + || (old->dynamic_name && new->dynamic_name && !strcmp(old->dynamic_name, new->dynamic_name))) + && (old->dynamic_name_digits == new->dynamic_name_digits); /* FIXME: Move channel reconfiguration to generic protocol code ? */ struct channel *C, *C2; @@ -1926,6 +2047,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) if (same) p->cf = new; + /* Reset name counter */ + p->dynamic_name_counter = 0; + return same; } @@ -2056,7 +2180,7 @@ bgp_state_dsc(struct bgp_proto *p) return "Down"; int state = MAX(p->incoming_conn.state, p->outgoing_conn.state); - if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive) + if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive) return "Passive"; return bgp_state_names[state]; @@ -2232,8 +2356,14 @@ bgp_show_proto_info(struct proto *P) struct bgp_proto *p = (struct bgp_proto *) P; cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); - cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface); + + if (bgp_is_dynamic(p) && p->cf->remote_range) + cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range); + else + cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); + cli_msg(-1006, " Local AS: %u", p->cf->local_as); if (p->gr_active_num) cli_msg(-1006, " Neighbor graceful restart active"); @@ -2269,7 +2399,7 @@ bgp_show_proto_info(struct proto *P) p->rr_client ? " route-reflector" : "", p->rs_client ? " route-server" : "", p->as4_session ? " AS4" : ""); - cli_msg(-1006, " Source address: %I", p->source_addr); + cli_msg(-1006, " Source address: %I", p->local_ip); cli_msg(-1006, " Hold timer: %t/%u", tm_remains(p->conn->hold_timer), p->conn->hold_time); cli_msg(-1006, " Keepalive timer: %t/%u", diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b604c7aa..075e1bb9 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -83,6 +83,7 @@ struct bgp_config { struct iface *iface; /* Interface for link-local addresses */ u16 local_port; /* Local listening port */ u16 remote_port; /* Neighbor destination port */ + int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */ int multihop; /* Number of hops if multihop */ int strict_bind; /* Bind listening socket to local address */ int ttl_security; /* Enable TTL security [RFC 5082] */ @@ -123,6 +124,9 @@ struct bgp_config { u32 disable_after_cease; /* Disable it when cease is received, bitfield */ char *password; /* Password used for MD5 authentication */ + net_addr *remote_range; /* Allowed neighbor range for dynamic BGP */ + char *dynamic_name; /* Name pattern for dynamic BGP */ + int dynamic_name_digits; /* Minimum number of digits for dynamic names */ int check_link; /* Use iface link state for liveness detection */ int bfd; /* Use BFD for liveness detection */ }; @@ -136,6 +140,7 @@ struct bgp_channel_config { ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */ u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */ + u8 mandatory; /* Channel is mandatory in capability negotiation */ u8 missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ @@ -151,6 +156,9 @@ struct bgp_channel_config { struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ }; +#define BGP_PT_INTERNAL 1 +#define BGP_PT_EXTERNAL 2 + #define NH_NO 0 #define NH_ALL 1 #define NH_IBGP 2 @@ -213,8 +221,11 @@ struct bgp_caps { u16 gr_time; /* Graceful restart time in seconds */ u8 llgr_aware; /* Long-lived GR capability, RFC draft */ + u8 any_ext_next_hop; /* Bitwise OR of per-AF ext_next_hop */ + u8 any_add_path; /* Bitwise OR of per-AF add_path */ u16 af_count; /* Number of af_data items */ + u16 length; /* Length of capabilities in OPEN msg */ struct bgp_af_caps af_data[0]; /* Per-AF capability data */ }; @@ -235,6 +246,7 @@ struct bgp_conn { u8 state; /* State of connection state machine */ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 ext_messages; /* Session uses extended message length */ + u32 received_as; /* ASN received in OPEN message */ struct bgp_caps *local_caps; struct bgp_caps *remote_caps; @@ -254,18 +266,21 @@ struct bgp_conn { struct bgp_proto { struct proto p; - struct bgp_config *cf; /* Shortcut to BGP configuration */ + const struct bgp_config *cf; /* Shortcut to BGP configuration */ + ip_addr local_ip, remote_ip; u32 local_as, remote_as; u32 public_as; /* Externally visible ASN (local_as or confederation id) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ - int start_state; /* Substates that partitions BS_START */ + u8 start_state; /* Substates that partitions BS_START */ u8 is_internal; /* Internal BGP session (local_as == remote_as) */ u8 is_interior; /* Internal or intra-confederation BGP session */ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 rr_client; /* Whether neighbor is RR client of me */ u8 rs_client; /* Whether neighbor is RS client of me */ + u8 ipv4; /* Use IPv4 connection, i.e. remote_ip is IPv4 */ + u8 passive; /* Do not initiate outgoing connection */ u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */ u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */ u8 gr_ready; /* Neighbor could do graceful restart */ @@ -282,11 +297,12 @@ struct bgp_proto { struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ struct bgp_socket *sock; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ - ip_addr source_addr; /* Local address used as an advertised next hop */ - ip_addr link_addr; /* Link-local version of source_addr */ + struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ + ip_addr link_addr; /* Link-local version of local_ip */ event *event; /* Event for respawning and shutting process */ timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ + int dynamic_name_counter; /* Counter for dynamic BGP names */ uint startup_delay; /* Delay (in seconds) of protocol startup due to previous errors */ btime last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ @@ -472,7 +488,7 @@ void bgp_graceful_restart_done(struct bgp_channel *c); void bgp_refresh_begin(struct bgp_channel *c); void bgp_refresh_end(struct bgp_channel *c); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); -void bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len); +void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len); struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); @@ -549,6 +565,7 @@ void bgp_get_route_info(struct rte *, byte *buf); /* packets.c */ void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new); +void bgp_prepare_capabilities(struct bgp_conn *conn); const struct bgp_af_desc *bgp_get_af_desc(u32 afi); const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi); void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type); diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index ac8d024a..bbc7d9a4 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -29,7 +29,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, - LIVED, STALE, IMPORT, IBGP, EBGP) + LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, + DYNAMIC, RANGE, NAME, DIGITS) %type <i> bgp_nh %type <i32> bgp_afi @@ -68,6 +69,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->llgr_mode = -1; BGP_CFG->llgr_time = 3600; BGP_CFG->setkey = 1; + BGP_CFG->dynamic_name = "dynbgp"; BGP_CFG->check_link = -1; } ; @@ -82,6 +84,8 @@ bgp_nbr_opts: /* empty */ | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); } | bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; } + | bgp_nbr_opts INTERNAL { BGP_CFG->peer_type = BGP_PT_INTERNAL; } + | bgp_nbr_opts EXTERNAL { BGP_CFG->peer_type = BGP_PT_EXTERNAL; } ; bgp_cease_mask: @@ -118,11 +122,18 @@ bgp_proto: } | bgp_proto NEIGHBOR bgp_nbr_opts ';' | bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' { - if (ipa_nonzero(BGP_CFG->remote_ip)) + if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range) cf_error("Only one neighbor per BGP instance is allowed"); BGP_CFG->remote_ip = $3; if ($4) BGP_CFG->iface = $4; } + | bgp_proto NEIGHBOR RANGE net_ip bgp_nbr_opts ';' { + if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range) + cf_error("Only one neighbor per BGP instance is allowed"); + net_addr *n = cfg_alloc($4.length); + net_copy(n, &($4)); + BGP_CFG->remote_range = n; + } | bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); } | bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; } | bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; } @@ -134,6 +145,12 @@ bgp_proto: | bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; } | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } + | bgp_proto DYNAMIC NAME text ';' { + if (strchr($4, '%')) cf_error("Forbidden character '%%' in dynamic name"); + if (strlen($4) > (SYM_MAX_LEN - 16)) cf_error("Dynamic name too long"); + BGP_CFG->dynamic_name = $4; + } + | bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); } | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; } | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } @@ -223,6 +240,7 @@ bgp_channel_item: | NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; } | NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; } | NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; } + | MANDATORY bool { BGP_CC->mandatory = $2; } | MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; } | MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; } | MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 2b5cc440..daa88630 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -100,7 +100,7 @@ init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d) d->peer_as = p->remote_as; d->local_as = p->local_as; d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0; - d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6; + d->af = ipa_is_ip4(p->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6; d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE; d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE; d->as4 = p_ok ? p->as4_session : 0; @@ -208,19 +208,22 @@ bgp_af_caps_cmp(const void *X, const void *Y) } -static byte * -bgp_write_capabilities(struct bgp_conn *conn, byte *buf) +void +bgp_prepare_capabilities(struct bgp_conn *conn) { struct bgp_proto *p = conn->bgp; struct bgp_channel *c; struct bgp_caps *caps; struct bgp_af_caps *ac; - uint any_ext_next_hop = 0; - uint any_add_path = 0; - byte *data; - /* Prepare bgp_caps structure */ + if (!p->cf->capabilities) + { + /* Just prepare empty local_caps */ + conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps)); + return; + } + /* Prepare bgp_caps structure */ int n = list_length(&p->p.channels); caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps)); conn->local_caps = caps; @@ -251,10 +254,10 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) ac->ready = 1; ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop; - any_ext_next_hop |= ac->ext_next_hop; + caps->any_ext_next_hop |= ac->ext_next_hop; ac->add_path = c->cf->add_path; - any_add_path |= ac->add_path; + caps->any_add_path |= ac->add_path; if (c->cf->gr_able) { @@ -276,7 +279,16 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) /* Sort capability fields by AFI/SAFI */ qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp); +} +static byte * +bgp_write_capabilities(struct bgp_conn *conn, byte *buf) +{ + struct bgp_proto *p = conn->bgp; + struct bgp_caps *caps = conn->local_caps; + struct bgp_af_caps *ac; + byte *buf_head = buf; + byte *data; /* Create capability list in buffer */ @@ -301,7 +313,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) *buf++ = 0; /* Capability data length */ } - if (any_ext_next_hop) + if (caps->any_ext_next_hop) { *buf++ = 5; /* Capability 5: Support for extended next hop */ *buf++ = 0; /* Capability data length, will be fixed later */ @@ -353,7 +365,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) buf += 4; } - if (any_add_path) + if (caps->any_add_path) { *buf++ = 69; /* Capability 69: Support for ADD-PATH */ *buf++ = 0; /* Capability data length, will be fixed later */ @@ -394,6 +406,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) data[-1] = buf - data; } + caps->length = buf - buf_head; + return buf; } @@ -405,6 +419,8 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i int i, cl; u32 af; + caps->length += len; + while (len > 0) { if (len < 2 || len < (2 + pos[1])) @@ -569,6 +585,42 @@ err: } static int +bgp_check_capabilities(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + struct bgp_caps *local = conn->local_caps; + struct bgp_caps *remote = conn->remote_caps; + struct bgp_channel *c; + int count = 0; + + /* This is partially overlapping with bgp_conn_enter_established_state(), + but we need to run this just after we receive OPEN message */ + + WALK_LIST(c, p->p.channels) + { + const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); + const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi); + + /* Find out whether this channel will be active */ + int active = loc && loc->ready && + ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4))); + + /* Mandatory must be active */ + if (c->cf->mandatory && !active) + return 0; + + if (active) + count++; + } + + /* We need at least one channel active */ + if (!count) + return 0; + + return 1; +} + +static int bgp_read_options(struct bgp_conn *conn, byte *pos, int len) { struct bgp_proto *p = conn->bgp; @@ -635,9 +687,6 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) } else { - /* Prepare empty local_caps */ - conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps)); - buf[9] = 0; /* No optional parameters */ return buf + 10; } @@ -678,6 +727,10 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) if (!id || (p->is_internal && id == p->local_id)) { bgp_error(conn, 2, 3, pkt+24, -4); return; } + /* RFC 5492 4 - check for required capabilities */ + if (p->cf->capabilities && !bgp_check_capabilities(conn)) + { bgp_error(conn, 2, 7, NULL, 0); return; } + struct bgp_caps *caps = conn->remote_caps; if (caps->as4_support) @@ -687,13 +740,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) if ((as4 != asn) && (asn != AS_TRANS)) log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name); - if (as4 != p->remote_as) + /* When remote ASN is unspecified, it must be external one */ + if (p->remote_as ? (as4 != p->remote_as) : (as4 == p->local_as)) { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; } + + conn->received_as = as4; } else { - if (asn != p->remote_as) + if (p->remote_as ? (asn != p->remote_as) : (asn == p->local_as)) { bgp_error(conn, 2, 2, pkt+20, 2); return; } + + conn->received_as = asn; } /* Check the other connection */ @@ -962,7 +1020,7 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) WITHDRAW(NO_NEXT_HOP); ip_addr *nh = (void *) a->u.ptr->data; - ip_addr peer = s->proto->cf->remote_ip; + ip_addr peer = s->proto->remote_ip; uint len = a->u.ptr->length; /* Forbid zero next hop */ @@ -2280,7 +2338,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis a->source = RTS_BGP; a->scope = SCOPE_UNIVERSE; - a->from = s->proto->cf->remote_ip; + a->from = s->proto->remote_ip; a->eattrs = ea; c->desc->decode_next_hop(s, nh, nh_len, a); @@ -2634,6 +2692,12 @@ bgp_fire_tx(struct bgp_conn *conn) end = bgp_create_notification(conn, pkt); return bgp_send(conn, PKT_NOTIFICATION, end - buf); } + else if (s & (1 << PKT_OPEN)) + { + conn->packets_to_send &= ~(1 << PKT_OPEN); + end = bgp_create_open(conn, pkt); + return bgp_send(conn, PKT_OPEN, end - buf); + } else if (s & (1 << PKT_KEEPALIVE)) { conn->packets_to_send &= ~(1 << PKT_KEEPALIVE); @@ -2641,12 +2705,6 @@ bgp_fire_tx(struct bgp_conn *conn) bgp_start_timer(conn->keepalive_timer, conn->keepalive_time); return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH); } - else if (s & (1 << PKT_OPEN)) - { - conn->packets_to_send &= ~(1 << PKT_OPEN); - end = bgp_create_open(conn, pkt); - return bgp_send(conn, PKT_OPEN, end - buf); - } else while (conn->channels_to_send) { c = bgp_get_channel_to_send(p, conn); @@ -2731,15 +2789,18 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev)) ev_schedule(conn->tx_ev); } - void bgp_kick_tx(void *vconn) { struct bgp_conn *conn = vconn; DBG("BGP: kicking TX\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } void @@ -2748,8 +2809,12 @@ bgp_tx(sock *sk) struct bgp_conn *conn = sk->data; DBG("BGP: TX hook\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } @@ -2835,7 +2900,7 @@ bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp) return 1; /* Handle proper message */ - if ((msg_len > 128) && (msg_len + 1 > len)) + if ((msg_len > 255) && (msg_len + 1 > len)) return 0; /* Some elementary cleanup */ |