summaryrefslogtreecommitdiff
path: root/proto/bgp
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2019-07-03 11:09:52 +0200
committerMaria Matejka <mq@ucw.cz>2019-07-03 11:12:25 +0200
commiteac9250fd5b10809830361b94438339b3b31b270 (patch)
tree5c9ec2591f0baa462f5572f83e4c452c3a166c95 /proto/bgp
parent8816b6cdd98d24535eece6b5e35730aac57cd9f7 (diff)
parent026bfedb332d8c0dde28c693c177fe993b5df26d (diff)
Merge branch 'master' into mq-filter-stack
Diffstat (limited to 'proto/bgp')
-rw-r--r--proto/bgp/attrs.c4
-rw-r--r--proto/bgp/bgp.c232
-rw-r--r--proto/bgp/bgp.h27
-rw-r--r--proto/bgp/config.Y22
-rw-r--r--proto/bgp/packets.c121
5 files changed, 318 insertions, 88 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 7c6f2ee9..69c4b172 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1302,7 +1302,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
-#define PXH_PARAMS /8, *2, 2, 2, 8, 20
+#define PXH_PARAMS /8, *2, 2, 2, 8, 24
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
@@ -1730,7 +1730,7 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
- return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
+ return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
}
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 8dedde9f..b68575a5 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -129,6 +129,9 @@ static list bgp_sockets; /* Global list of listening sockets */
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
+static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn);
+static void bgp_setup_sk(struct bgp_conn *conn, sock *s);
+static void bgp_send_open(struct bgp_conn *conn);
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
@@ -149,7 +152,7 @@ bgp_open(struct bgp_proto *p)
struct bgp_socket *bs = NULL;
struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
- (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
+ (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
uint port = p->cf->local_port;
/* FIXME: Add some global init? */
@@ -272,8 +275,17 @@ bgp_startup(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "Started");
p->start_state = BSS_CONNECT;
- if (!p->cf->passive)
+ if (!p->passive)
bgp_active(p);
+
+ if (p->postponed_sk)
+ {
+ /* Apply postponed incoming connection */
+ bgp_setup_conn(p, &p->incoming_conn);
+ bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
+ bgp_send_open(&p->incoming_conn);
+ p->postponed_sk = NULL;
+ }
}
static void
@@ -387,7 +399,7 @@ bgp_close_conn(struct bgp_conn *conn)
void
bgp_update_startup_delay(struct bgp_proto *p)
{
- struct bgp_config *cf = p->cf;
+ const struct bgp_config *cf = p->cf;
DBG("BGP: Updating startup delay\n");
@@ -410,7 +422,7 @@ bgp_update_startup_delay(struct bgp_proto *p)
}
static void
-bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
+bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len)
{
switch (conn->state)
{
@@ -426,7 +438,13 @@ bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint le
case BS_OPENSENT:
case BS_OPENCONFIRM:
case BS_ESTABLISHED:
- bgp_error(conn, 6, subcode, data, len);
+ if (subcode < 0)
+ {
+ bgp_conn_enter_close_state(conn);
+ bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
+ }
+ else
+ bgp_error(conn, 6, subcode, data, len);
return;
default:
@@ -456,7 +474,7 @@ bgp_decision(void *vp)
if ((p->p.proto_state == PS_START) &&
(p->outgoing_conn.state == BS_IDLE) &&
(p->incoming_conn.state != BS_OPENCONFIRM) &&
- !p->cf->passive)
+ !p->passive)
bgp_active(p);
if ((p->p.proto_state == PS_STOP) &&
@@ -465,8 +483,31 @@ bgp_decision(void *vp)
bgp_down(p);
}
+static struct bgp_proto *
+bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
+{
+ struct symbol *sym;
+ char fmt[SYM_MAX_LEN];
+
+ bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits);
+
+ /* This is hack, we would like to share config, but we need to copy it now */
+ new_config = config;
+ cfg_mem = config->mem;
+ conf_this_scope = config->root_scope;
+ sym = cf_default_name(fmt, &(pp->dynamic_name_counter));
+ proto_clone_config(sym, pp->p.cf);
+ new_config = NULL;
+ cfg_mem = NULL;
+
+ /* Just pass remote_ip to bgp_init() */
+ ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
+
+ return (void *) proto_spawn(sym->proto, 0);
+}
+
void
-bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
+bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
{
proto_notify_state(&p->p, PS_STOP);
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
@@ -491,6 +532,7 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
}
static const struct bgp_af_caps dummy_af_caps = { };
+static const struct bgp_af_caps basic_af_caps = { .ready = 1 };
void
bgp_conn_enter_established_state(struct bgp_conn *conn)
@@ -503,8 +545,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
BGP_TRACE(D_EVENTS, "BGP session established");
/* For multi-hop BGP sessions */
- if (ipa_zero(p->source_addr))
- p->source_addr = conn->sk->saddr;
+ if (ipa_zero(p->local_ip))
+ p->local_ip = conn->sk->saddr;
+
+ /* For promiscuous sessions */
+ if (!p->remote_as)
+ p->remote_as = conn->received_as;
/* In case of LLv6 is not valid during BGP start */
if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
@@ -541,6 +587,13 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
+ /* Use default if capabilities were not announced */
+ if (!local->length && (c->afi == BGP_AF_IPV4))
+ loc = &basic_af_caps;
+
+ if (!peer->length && (c->afi == BGP_AF_IPV4))
+ rem = &basic_af_caps;
+
/* Ignore AFIs that were not announced in multiprotocol capability */
if (!loc || !loc->ready)
loc = &dummy_af_caps;
@@ -880,6 +933,7 @@ bgp_send_open(struct bgp_conn *conn)
conn->sk->rx_hook = bgp_rx;
conn->sk->tx_hook = bgp_tx;
tm_stop(conn->connect_timer);
+ bgp_prepare_capabilities(conn);
bgp_schedule_packet(conn, NULL, PKT_OPEN);
bgp_conn_set_state(conn, BS_OPENSENT);
bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
@@ -1039,8 +1093,8 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
DBG("BGP: Connecting\n");
sock *s = sk_new(p->p.pool);
s->type = SK_TCP_ACTIVE;
- s->saddr = p->source_addr;
- s->daddr = p->cf->remote_ip;
+ s->saddr = p->local_ip;
+ s->daddr = p->remote_ip;
s->dport = p->cf->remote_port;
s->iface = p->neigh ? p->neigh->iface : NULL;
s->vrf = p->p.vrf;
@@ -1075,6 +1129,9 @@ err:
return;
}
+static inline int bgp_is_dynamic(struct bgp_proto *p)
+{ return ipa_zero(p->remote_ip); }
+
/**
* bgp_find_proto - find existing proto for incoming connection
* @sk: TCP socket
@@ -1083,6 +1140,7 @@ err:
static struct bgp_proto *
bgp_find_proto(sock *sk)
{
+ struct bgp_proto *best = NULL;
struct bgp_proto *p;
/* sk->iface is valid only if src or dst address is link-local */
@@ -1090,13 +1148,20 @@ bgp_find_proto(sock *sk)
WALK_LIST(p, proto_list)
if ((p->p.proto == &proto_bgp) &&
- (p->sock == sk->data) &&
- ipa_equal(p->cf->remote_ip, sk->daddr) &&
+ (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
+ (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
+ (p->p.vrf == sk->vrf) &&
+ (p->cf->local_port == sk->sport) &&
(!link || (p->cf->iface == sk->iface)) &&
(ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
- return p;
+ {
+ best = p;
- return NULL;
+ if (!bgp_is_dynamic(p))
+ break;
+ }
+
+ return best;
}
/**
@@ -1175,6 +1240,16 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
sk_reallocate(sk);
}
+ /* For dynamic BGP, spawn new instance and postpone the socket */
+ if (bgp_is_dynamic(p))
+ {
+ p = bgp_spawn(p, sk->daddr);
+ p->postponed_sk = sk;
+ rmove(sk, p->p.pool);
+ return 0;
+ }
+
+ rmove(sk, p->p.pool);
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
bgp_send_open(&p->incoming_conn);
@@ -1201,11 +1276,11 @@ bgp_start_neighbor(struct bgp_proto *p)
{
/* Called only for single-hop BGP sessions */
- if (ipa_zero(p->source_addr))
- p->source_addr = p->neigh->ifa->ip;
+ if (ipa_zero(p->local_ip))
+ p->local_ip = p->neigh->ifa->ip;
- if (ipa_is_link_local(p->source_addr))
- p->link_addr = p->source_addr;
+ if (ipa_is_link_local(p->local_ip))
+ p->link_addr = p->local_ip;
else if (p->neigh->iface->llv6)
p->link_addr = p->neigh->iface->llv6->ip;
@@ -1293,8 +1368,8 @@ bgp_bfd_notify(struct bfd_request *req)
static void
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
{
- if (use_bfd && !p->bfd_req)
- p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
+ if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p))
+ p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
p->cf->multihop ? NULL : p->neigh->iface,
bgp_bfd_notify, p);
@@ -1375,7 +1450,7 @@ static void
bgp_start_locked(struct object_lock *lock)
{
struct bgp_proto *p = lock->data;
- struct bgp_config *cf = p->cf;
+ const struct bgp_config *cf = p->cf;
if (p->p.proto_state != PS_START)
{
@@ -1385,17 +1460,17 @@ bgp_start_locked(struct object_lock *lock)
DBG("BGP: Got lock\n");
- if (cf->multihop)
+ if (cf->multihop || bgp_is_dynamic(p))
{
/* Multi-hop sessions do not use neighbor entries */
bgp_initiate(p);
return;
}
- neighbor *n = neigh_find(&p->p, cf->remote_ip, cf->iface, NEF_STICKY);
+ neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
if (!n)
{
- log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
+ log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
/* As we do not start yet, we can just disable protocol */
p->p.disabled = 1;
bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
@@ -1406,7 +1481,7 @@ bgp_start_locked(struct object_lock *lock)
p->neigh = n;
if (n->scope <= 0)
- BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
+ BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface);
else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
else
@@ -1417,14 +1492,29 @@ static int
bgp_start(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- struct object_lock *lock;
+ const struct bgp_config *cf = p->cf;
+
+ p->local_ip = cf->local_ip;
+ p->local_as = cf->local_as;
+ p->remote_as = cf->remote_as;
+ p->public_as = cf->local_as;
+
+ /* For dynamic BGP childs, remote_ip is already set */
+ if (ipa_nonzero(cf->remote_ip))
+ p->remote_ip = cf->remote_ip;
+
+ /* Confederation ID is used for truly external peers */
+ if (p->cf->confederation && !p->is_interior)
+ p->public_as = cf->confederation;
+
+ p->passive = cf->passive || bgp_is_dynamic(p);
- DBG("BGP: Startup.\n");
p->start_state = BSS_PREPARE;
p->outgoing_conn.state = BS_IDLE;
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
p->bfd_req = NULL;
+ p->postponed_sk = NULL;
p->gr_ready = 0;
p->gr_active_num = 0;
@@ -1437,7 +1527,6 @@ bgp_start(struct proto *P)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
p->remote_id = 0;
- p->source_addr = p->cf->local_ip;
p->link_addr = IPA_NONE;
/* Lock all channels when in GR recovery mode */
@@ -1452,9 +1541,9 @@ bgp_start(struct proto *P)
* Before attempting to create the connection, we need to lock the port,
* so that we are the only instance attempting to talk with that neighbor.
*/
-
+ struct object_lock *lock;
lock = p->lock = olock_new(P->pool);
- lock->addr = p->cf->remote_ip;
+ lock->addr = p->remote_ip;
lock->port = p->cf->remote_port;
lock->iface = p->cf->iface;
lock->vrf = p->cf->iface ? NULL : p->p.vrf;
@@ -1472,7 +1561,7 @@ static int
bgp_shutdown(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- uint subcode = 0;
+ int subcode = 0;
char *message = NULL;
byte *data = NULL;
@@ -1493,6 +1582,7 @@ bgp_shutdown(struct proto *P)
case PDC_CMD_DISABLE:
case PDC_CMD_SHUTDOWN:
+ shutdown:
subcode = 2; // Errcode 6, 2 - administrative shutdown
message = P->message;
break;
@@ -1502,6 +1592,14 @@ bgp_shutdown(struct proto *P)
message = P->message;
break;
+ case PDC_CMD_GR_DOWN:
+ if ((p->cf->gr_mode != BGP_GR_ABLE) &&
+ (p->cf->llgr_mode != BGP_LLGR_ABLE))
+ goto shutdown;
+
+ subcode = -1; // Do not send NOTIFICATION, just close the connection
+ break;
+
case PDC_RX_LIMIT_HIT:
case PDC_IN_LIMIT_HIT:
subcode = 1; // Errcode 6, 1 - max number of prefixes reached
@@ -1528,7 +1626,7 @@ bgp_shutdown(struct proto *P)
if (message)
{
uint msg_len = strlen(message);
- msg_len = MIN(msg_len, 128);
+ msg_len = MIN(msg_len, 255);
/* Buffer will be freed automatically by protocol shutdown */
data = mb_alloc(p->p.pool, msg_len + 1);
@@ -1562,17 +1660,21 @@ bgp_init(struct proto_config *CF)
P->rte_modify = bgp_rte_modify_stale;
p->cf = cf;
- p->local_as = cf->local_as;
- p->remote_as = cf->remote_as;
- p->public_as = cf->local_as;
p->is_internal = (cf->local_as == cf->remote_as);
p->is_interior = p->is_internal || cf->confederation_member;
p->rs_client = cf->rs_client;
p->rr_client = cf->rr_client;
- /* Confederation ID is used for truly external peers */
- if (cf->confederation && !p->is_interior)
- p->public_as = cf->confederation;
+ p->ipv4 = ipa_nonzero(cf->remote_ip) ?
+ ipa_is_ip4(cf->remote_ip) :
+ (cf->remote_range && (cf->remote_range->type == NET_IP4));
+
+ p->remote_ip = cf->remote_ip;
+ p->remote_as = cf->remote_as;
+
+ /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
+ if (cf->c.parent)
+ cf->remote_ip = IPA_NONE;
/* Add all channels */
struct bgp_channel_config *cc;
@@ -1604,7 +1706,7 @@ bgp_channel_start(struct channel *C)
{
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
- ip_addr src = p->source_addr;
+ ip_addr src = p->local_ip;
if (c->igp_table_ip4)
rt_lock_table(c->igp_table_ip4);
@@ -1745,14 +1847,19 @@ void
bgp_postconfig(struct proto_config *CF)
{
struct bgp_config *cf = (void *) CF;
- int internal = (cf->local_as == cf->remote_as);
- int interior = internal || cf->confederation_member;
/* Do not check templates at all */
if (cf->c.class == SYM_TEMPLATE)
return;
+ /* Handle undefined remote_as, zero should mean unspecified external */
+ if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL))
+ cf->remote_as = cf->local_as;
+
+ int internal = (cf->local_as == cf->remote_as);
+ int interior = internal || cf->confederation_member;
+
/* EBGP direct by default, IBGP multihop by default */
if (cf->multihop < 0)
cf->multihop = internal ? 64 : 0;
@@ -1769,11 +1876,20 @@ bgp_postconfig(struct proto_config *CF)
if (!cf->local_as)
cf_error("Local AS number must be set");
- if (ipa_zero(cf->remote_ip))
+ if (ipa_zero(cf->remote_ip) && !cf->remote_range)
cf_error("Neighbor must be configured");
- if (!cf->remote_as)
- cf_error("Remote AS number must be set");
+ if (ipa_zero(cf->local_ip) && cf->strict_bind)
+ cf_error("Local address must be configured for strict bind");
+
+ if (!cf->remote_as && !cf->peer_type)
+ cf_error("Remote AS number (or peer type) must be set");
+
+ if ((cf->peer_type == BGP_PT_INTERNAL) && !internal)
+ cf_error("IBGP cannot have different ASNs");
+
+ if ((cf->peer_type == BGP_PT_EXTERNAL) && internal)
+ cf_error("EBGP cannot have the same ASNs");
if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
ipa_is_link_local(cf->remote_ip)))
@@ -1885,8 +2001,8 @@ static int
bgp_reconfigure(struct proto *P, struct proto_config *CF)
{
struct bgp_proto *p = (void *) P;
- struct bgp_config *new = (void *) CF;
- struct bgp_config *old = p->cf;
+ const struct bgp_config *new = (void *) CF;
+ const struct bgp_config *old = p->cf;
if (proto_get_router_id(CF) != p->local_id)
return 0;
@@ -1896,7 +2012,12 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
// password item is last and must be checked separately
OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
&& ((!old->password && !new->password)
- || (old->password && new->password && !strcmp(old->password, new->password)));
+ || (old->password && new->password && !strcmp(old->password, new->password)))
+ && ((!old->remote_range && !new->remote_range)
+ || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range)))
+ && ((!old->dynamic_name && !new->dynamic_name)
+ || (old->dynamic_name && new->dynamic_name && !strcmp(old->dynamic_name, new->dynamic_name)))
+ && (old->dynamic_name_digits == new->dynamic_name_digits);
/* FIXME: Move channel reconfiguration to generic protocol code ? */
struct channel *C, *C2;
@@ -1926,6 +2047,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
if (same)
p->cf = new;
+ /* Reset name counter */
+ p->dynamic_name_counter = 0;
+
return same;
}
@@ -2056,7 +2180,7 @@ bgp_state_dsc(struct bgp_proto *p)
return "Down";
int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
- if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
+ if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive)
return "Passive";
return bgp_state_names[state];
@@ -2232,8 +2356,14 @@ bgp_show_proto_info(struct proto *P)
struct bgp_proto *p = (struct bgp_proto *) P;
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
- cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
+
+ if (bgp_is_dynamic(p) && p->cf->remote_range)
+ cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range);
+ else
+ cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
+
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
+ cli_msg(-1006, " Local AS: %u", p->cf->local_as);
if (p->gr_active_num)
cli_msg(-1006, " Neighbor graceful restart active");
@@ -2269,7 +2399,7 @@ bgp_show_proto_info(struct proto *P)
p->rr_client ? " route-reflector" : "",
p->rs_client ? " route-server" : "",
p->as4_session ? " AS4" : "");
- cli_msg(-1006, " Source address: %I", p->source_addr);
+ cli_msg(-1006, " Source address: %I", p->local_ip);
cli_msg(-1006, " Hold timer: %t/%u",
tm_remains(p->conn->hold_timer), p->conn->hold_time);
cli_msg(-1006, " Keepalive timer: %t/%u",
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index b604c7aa..075e1bb9 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -83,6 +83,7 @@ struct bgp_config {
struct iface *iface; /* Interface for link-local addresses */
u16 local_port; /* Local listening port */
u16 remote_port; /* Neighbor destination port */
+ int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */
int multihop; /* Number of hops if multihop */
int strict_bind; /* Bind listening socket to local address */
int ttl_security; /* Enable TTL security [RFC 5082] */
@@ -123,6 +124,9 @@ struct bgp_config {
u32 disable_after_cease; /* Disable it when cease is received, bitfield */
char *password; /* Password used for MD5 authentication */
+ net_addr *remote_range; /* Allowed neighbor range for dynamic BGP */
+ char *dynamic_name; /* Name pattern for dynamic BGP */
+ int dynamic_name_digits; /* Minimum number of digits for dynamic names */
int check_link; /* Use iface link state for liveness detection */
int bfd; /* Use BFD for liveness detection */
};
@@ -136,6 +140,7 @@ struct bgp_channel_config {
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */
u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */
+ u8 mandatory; /* Channel is mandatory in capability negotiation */
u8 missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */
u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
@@ -151,6 +156,9 @@ struct bgp_channel_config {
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
};
+#define BGP_PT_INTERNAL 1
+#define BGP_PT_EXTERNAL 2
+
#define NH_NO 0
#define NH_ALL 1
#define NH_IBGP 2
@@ -213,8 +221,11 @@ struct bgp_caps {
u16 gr_time; /* Graceful restart time in seconds */
u8 llgr_aware; /* Long-lived GR capability, RFC draft */
+ u8 any_ext_next_hop; /* Bitwise OR of per-AF ext_next_hop */
+ u8 any_add_path; /* Bitwise OR of per-AF add_path */
u16 af_count; /* Number of af_data items */
+ u16 length; /* Length of capabilities in OPEN msg */
struct bgp_af_caps af_data[0]; /* Per-AF capability data */
};
@@ -235,6 +246,7 @@ struct bgp_conn {
u8 state; /* State of connection state machine */
u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
u8 ext_messages; /* Session uses extended message length */
+ u32 received_as; /* ASN received in OPEN message */
struct bgp_caps *local_caps;
struct bgp_caps *remote_caps;
@@ -254,18 +266,21 @@ struct bgp_conn {
struct bgp_proto {
struct proto p;
- struct bgp_config *cf; /* Shortcut to BGP configuration */
+ const struct bgp_config *cf; /* Shortcut to BGP configuration */
+ ip_addr local_ip, remote_ip;
u32 local_as, remote_as;
u32 public_as; /* Externally visible ASN (local_as or confederation id) */
u32 local_id; /* BGP identifier of this router */
u32 remote_id; /* BGP identifier of the neighbor */
u32 rr_cluster_id; /* Route reflector cluster ID */
- int start_state; /* Substates that partitions BS_START */
+ u8 start_state; /* Substates that partitions BS_START */
u8 is_internal; /* Internal BGP session (local_as == remote_as) */
u8 is_interior; /* Internal or intra-confederation BGP session */
u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
u8 rr_client; /* Whether neighbor is RR client of me */
u8 rs_client; /* Whether neighbor is RS client of me */
+ u8 ipv4; /* Use IPv4 connection, i.e. remote_ip is IPv4 */
+ u8 passive; /* Do not initiate outgoing connection */
u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */
u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */
u8 gr_ready; /* Neighbor could do graceful restart */
@@ -282,11 +297,12 @@ struct bgp_proto {
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
struct bgp_socket *sock; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
- ip_addr source_addr; /* Local address used as an advertised next hop */
- ip_addr link_addr; /* Link-local version of source_addr */
+ struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */
+ ip_addr link_addr; /* Link-local version of local_ip */
event *event; /* Event for respawning and shutting process */
timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
+ int dynamic_name_counter; /* Counter for dynamic BGP names */
uint startup_delay; /* Delay (in seconds) of protocol startup due to previous errors */
btime last_proto_error; /* Time of last error that leads to protocol stop */
u8 last_error_class; /* Error class of last error */
@@ -472,7 +488,7 @@ void bgp_graceful_restart_done(struct bgp_channel *c);
void bgp_refresh_begin(struct bgp_channel *c);
void bgp_refresh_end(struct bgp_channel *c);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
-void bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len);
+void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len);
struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
@@ -549,6 +565,7 @@ void bgp_get_route_info(struct rte *, byte *buf);
/* packets.c */
void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new);
+void bgp_prepare_capabilities(struct bgp_conn *conn);
const struct bgp_af_desc *bgp_get_af_desc(u32 afi);
const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi);
void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type);
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index ac8d024a..bbc7d9a4 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -29,7 +29,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
- LIVED, STALE, IMPORT, IBGP, EBGP)
+ LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL,
+ DYNAMIC, RANGE, NAME, DIGITS)
%type <i> bgp_nh
%type <i32> bgp_afi
@@ -68,6 +69,7 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->llgr_mode = -1;
BGP_CFG->llgr_time = 3600;
BGP_CFG->setkey = 1;
+ BGP_CFG->dynamic_name = "dynbgp";
BGP_CFG->check_link = -1;
}
;
@@ -82,6 +84,8 @@ bgp_nbr_opts:
/* empty */
| bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); }
| bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; }
+ | bgp_nbr_opts INTERNAL { BGP_CFG->peer_type = BGP_PT_INTERNAL; }
+ | bgp_nbr_opts EXTERNAL { BGP_CFG->peer_type = BGP_PT_EXTERNAL; }
;
bgp_cease_mask:
@@ -118,11 +122,18 @@ bgp_proto:
}
| bgp_proto NEIGHBOR bgp_nbr_opts ';'
| bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' {
- if (ipa_nonzero(BGP_CFG->remote_ip))
+ if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range)
cf_error("Only one neighbor per BGP instance is allowed");
BGP_CFG->remote_ip = $3;
if ($4) BGP_CFG->iface = $4;
}
+ | bgp_proto NEIGHBOR RANGE net_ip bgp_nbr_opts ';' {
+ if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range)
+ cf_error("Only one neighbor per BGP instance is allowed");
+ net_addr *n = cfg_alloc($4.length);
+ net_copy(n, &($4));
+ BGP_CFG->remote_range = n;
+ }
| bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); }
| bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; }
| bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; }
@@ -134,6 +145,12 @@ bgp_proto:
| bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; }
| bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
| bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); }
+ | bgp_proto DYNAMIC NAME text ';' {
+ if (strchr($4, '%')) cf_error("Forbidden character '%%' in dynamic name");
+ if (strlen($4) > (SYM_MAX_LEN - 16)) cf_error("Dynamic name too long");
+ BGP_CFG->dynamic_name = $4;
+ }
+ | bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); }
| bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; }
| bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; }
| bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; }
@@ -223,6 +240,7 @@ bgp_channel_item:
| NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; }
| NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; }
| NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; }
+ | MANDATORY bool { BGP_CC->mandatory = $2; }
| MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; }
| MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; }
| MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; }
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 2b5cc440..daa88630 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -100,7 +100,7 @@ init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
d->peer_as = p->remote_as;
d->local_as = p->local_as;
d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
- d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
+ d->af = ipa_is_ip4(p->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
d->as4 = p_ok ? p->as4_session : 0;
@@ -208,19 +208,22 @@ bgp_af_caps_cmp(const void *X, const void *Y)
}
-static byte *
-bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
+void
+bgp_prepare_capabilities(struct bgp_conn *conn)
{
struct bgp_proto *p = conn->bgp;
struct bgp_channel *c;
struct bgp_caps *caps;
struct bgp_af_caps *ac;
- uint any_ext_next_hop = 0;
- uint any_add_path = 0;
- byte *data;
- /* Prepare bgp_caps structure */
+ if (!p->cf->capabilities)
+ {
+ /* Just prepare empty local_caps */
+ conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
+ return;
+ }
+ /* Prepare bgp_caps structure */
int n = list_length(&p->p.channels);
caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
conn->local_caps = caps;
@@ -251,10 +254,10 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
ac->ready = 1;
ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
- any_ext_next_hop |= ac->ext_next_hop;
+ caps->any_ext_next_hop |= ac->ext_next_hop;
ac->add_path = c->cf->add_path;
- any_add_path |= ac->add_path;
+ caps->any_add_path |= ac->add_path;
if (c->cf->gr_able)
{
@@ -276,7 +279,16 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
/* Sort capability fields by AFI/SAFI */
qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
+}
+static byte *
+bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
+{
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_caps *caps = conn->local_caps;
+ struct bgp_af_caps *ac;
+ byte *buf_head = buf;
+ byte *data;
/* Create capability list in buffer */
@@ -301,7 +313,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
- if (any_ext_next_hop)
+ if (caps->any_ext_next_hop)
{
*buf++ = 5; /* Capability 5: Support for extended next hop */
*buf++ = 0; /* Capability data length, will be fixed later */
@@ -353,7 +365,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
buf += 4;
}
- if (any_add_path)
+ if (caps->any_add_path)
{
*buf++ = 69; /* Capability 69: Support for ADD-PATH */
*buf++ = 0; /* Capability data length, will be fixed later */
@@ -394,6 +406,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
data[-1] = buf - data;
}
+ caps->length = buf - buf_head;
+
return buf;
}
@@ -405,6 +419,8 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i
int i, cl;
u32 af;
+ caps->length += len;
+
while (len > 0)
{
if (len < 2 || len < (2 + pos[1]))
@@ -569,6 +585,42 @@ err:
}
static int
+bgp_check_capabilities(struct bgp_conn *conn)
+{
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_caps *local = conn->local_caps;
+ struct bgp_caps *remote = conn->remote_caps;
+ struct bgp_channel *c;
+ int count = 0;
+
+ /* This is partially overlapping with bgp_conn_enter_established_state(),
+ but we need to run this just after we receive OPEN message */
+
+ WALK_LIST(c, p->p.channels)
+ {
+ const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
+ const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
+
+ /* Find out whether this channel will be active */
+ int active = loc && loc->ready &&
+ ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4)));
+
+ /* Mandatory must be active */
+ if (c->cf->mandatory && !active)
+ return 0;
+
+ if (active)
+ count++;
+ }
+
+ /* We need at least one channel active */
+ if (!count)
+ return 0;
+
+ return 1;
+}
+
+static int
bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
{
struct bgp_proto *p = conn->bgp;
@@ -635,9 +687,6 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
}
else
{
- /* Prepare empty local_caps */
- conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
-
buf[9] = 0; /* No optional parameters */
return buf + 10;
}
@@ -678,6 +727,10 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
if (!id || (p->is_internal && id == p->local_id))
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
+ /* RFC 5492 4 - check for required capabilities */
+ if (p->cf->capabilities && !bgp_check_capabilities(conn))
+ { bgp_error(conn, 2, 7, NULL, 0); return; }
+
struct bgp_caps *caps = conn->remote_caps;
if (caps->as4_support)
@@ -687,13 +740,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
if ((as4 != asn) && (asn != AS_TRANS))
log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
- if (as4 != p->remote_as)
+ /* When remote ASN is unspecified, it must be external one */
+ if (p->remote_as ? (as4 != p->remote_as) : (as4 == p->local_as))
{ as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
+
+ conn->received_as = as4;
}
else
{
- if (asn != p->remote_as)
+ if (p->remote_as ? (asn != p->remote_as) : (asn == p->local_as))
{ bgp_error(conn, 2, 2, pkt+20, 2); return; }
+
+ conn->received_as = asn;
}
/* Check the other connection */
@@ -962,7 +1020,7 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
WITHDRAW(NO_NEXT_HOP);
ip_addr *nh = (void *) a->u.ptr->data;
- ip_addr peer = s->proto->cf->remote_ip;
+ ip_addr peer = s->proto->remote_ip;
uint len = a->u.ptr->length;
/* Forbid zero next hop */
@@ -2280,7 +2338,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
a->source = RTS_BGP;
a->scope = SCOPE_UNIVERSE;
- a->from = s->proto->cf->remote_ip;
+ a->from = s->proto->remote_ip;
a->eattrs = ea;
c->desc->decode_next_hop(s, nh, nh_len, a);
@@ -2634,6 +2692,12 @@ bgp_fire_tx(struct bgp_conn *conn)
end = bgp_create_notification(conn, pkt);
return bgp_send(conn, PKT_NOTIFICATION, end - buf);
}
+ else if (s & (1 << PKT_OPEN))
+ {
+ conn->packets_to_send &= ~(1 << PKT_OPEN);
+ end = bgp_create_open(conn, pkt);
+ return bgp_send(conn, PKT_OPEN, end - buf);
+ }
else if (s & (1 << PKT_KEEPALIVE))
{
conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
@@ -2641,12 +2705,6 @@ bgp_fire_tx(struct bgp_conn *conn)
bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
}
- else if (s & (1 << PKT_OPEN))
- {
- conn->packets_to_send &= ~(1 << PKT_OPEN);
- end = bgp_create_open(conn, pkt);
- return bgp_send(conn, PKT_OPEN, end - buf);
- }
else while (conn->channels_to_send)
{
c = bgp_get_channel_to_send(p, conn);
@@ -2731,15 +2789,18 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
ev_schedule(conn->tx_ev);
}
-
void
bgp_kick_tx(void *vconn)
{
struct bgp_conn *conn = vconn;
DBG("BGP: kicking TX\n");
- while (bgp_fire_tx(conn) > 0)
+ uint max = 1024;
+ while (--max && (bgp_fire_tx(conn) > 0))
;
+
+ if (!max && !ev_active(conn->tx_ev))
+ ev_schedule(conn->tx_ev);
}
void
@@ -2748,8 +2809,12 @@ bgp_tx(sock *sk)
struct bgp_conn *conn = sk->data;
DBG("BGP: TX hook\n");
- while (bgp_fire_tx(conn) > 0)
+ uint max = 1024;
+ while (--max && (bgp_fire_tx(conn) > 0))
;
+
+ if (!max && !ev_active(conn->tx_ev))
+ ev_schedule(conn->tx_ev);
}
@@ -2835,7 +2900,7 @@ bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
return 1;
/* Handle proper message */
- if ((msg_len > 128) && (msg_len + 1 > len))
+ if ((msg_len > 255) && (msg_len + 1 > len))
return 0;
/* Some elementary cleanup */