summaryrefslogtreecommitdiff
path: root/proto/bgp/bgp.c
diff options
context:
space:
mode:
Diffstat (limited to 'proto/bgp/bgp.c')
-rw-r--r--proto/bgp/bgp.c1700
1 files changed, 1112 insertions, 588 deletions
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index b99672f5..8da5b0aa 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -2,6 +2,8 @@
* BIRD -- The Border Gateway Protocol
*
* (c) 2000 Martin Mares <mj@ucw.cz>
+ * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2008--2016 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
@@ -9,48 +11,52 @@
/**
* DOC: Border Gateway Protocol
*
- * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
- * connection and most of the interface with BIRD core, |packets.c| handling
+ * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
+ * the connection and most of the interface with BIRD core, |packets.c| handling
* both incoming and outgoing BGP packets and |attrs.c| containing functions for
* manipulation with BGP attribute lists.
*
- * As opposed to the other existing routing daemons, BIRD has a sophisticated core
- * architecture which is able to keep all the information needed by BGP in the
- * primary routing table, therefore no complex data structures like a central
- * BGP table are needed. This increases memory footprint of a BGP router with
- * many connections, but not too much and, which is more important, it makes
- * BGP much easier to implement.
+ * As opposed to the other existing routing daemons, BIRD has a sophisticated
+ * core architecture which is able to keep all the information needed by BGP in
+ * the primary routing table, therefore no complex data structures like a
+ * central BGP table are needed. This increases memory footprint of a BGP router
+ * with many connections, but not too much and, which is more important, it
+ * makes BGP much easier to implement.
*
- * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
- * structure to which are attached individual connections represented by &bgp_connection
- * (usually, there exists only one connection, but during BGP session setup, there
- * can be more of them). The connections are handled according to the BGP state machine
- * defined in the RFC with all the timers and all the parameters configurable.
+ * Each instance of BGP (corresponding to a single BGP peer) is described by a
+ * &bgp_proto structure to which are attached individual connections represented
+ * by &bgp_connection (usually, there exists only one connection, but during BGP
+ * session setup, there can be more of them). The connections are handled
+ * according to the BGP state machine defined in the RFC with all the timers and
+ * all the parameters configurable.
*
- * In incoming direction, we listen on the connection's socket and each time we receive
- * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
- * passes complete packets to bgp_rx_packet() which distributes the packet according
- * to its type.
+ * In incoming direction, we listen on the connection's socket and each time we
+ * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
+ * markers and passes complete packets to bgp_rx_packet() which distributes the
+ * packet according to its type.
*
- * In outgoing direction, we gather all the routing updates and sort them to buckets
- * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
- * of &rta's and a &fib which helps us to find if we already have another route for
- * the same destination queued for sending, so that we can replace it with the new one
- * immediately instead of sending both updates). There also exists a special bucket holding
- * all the route withdrawals which cannot be queued anywhere else as they don't have any
- * attributes. If we have any packet to send (due to either new routes or the connection
- * tracking code wanting to send a Open, Keepalive or Notification message), we call
- * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
- * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
- * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
- * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
- * type if we have more data of the same type to send.
+ * In outgoing direction, we gather all the routing updates and sort them to
+ * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
+ * fast comparison of &rta's and a &fib which helps us to find if we already
+ * have another route for the same destination queued for sending, so that we
+ * can replace it with the new one immediately instead of sending both
+ * updates). There also exists a special bucket holding all the route
+ * withdrawals which cannot be queued anywhere else as they don't have any
+ * attributes. If we have any packet to send (due to either new routes or the
+ * connection tracking code wanting to send a Open, Keepalive or Notification
+ * message), we call bgp_schedule_packet() which sets the corresponding bit in a
+ * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
+ * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
+ * packet type bits and calls the corresponding bgp_create_xx() functions,
+ * eventually rescheduling the same packet type if we have more data of the same
+ * type to send.
*
- * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
- * of the attribute blocks and translating them to the language of BIRD's extended attributes
- * and bgp_encode_attrs() which does the converse. Both functions are built around a
- * @bgp_attr_table array describing all important characteristics of all known attributes.
- * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
+ * The processing of attributes consists of two functions: bgp_decode_attrs()
+ * for checking of the attribute blocks and translating them to the language of
+ * BIRD's extended attributes and bgp_encode_attrs() which does the
+ * converse. Both functions are built around a @bgp_attr_table array describing
+ * all important characteristics of all known attributes. Unknown transitive
+ * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
*
* BGP protocol implements graceful restart in both restarting (local restart)
* and receiving (neighbor restart) roles. The first is handled mostly by the
@@ -61,10 +67,45 @@
* point of view and therefore maintaining received routes. Routing table
* refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
* stale routes after reestablishment of BGP session during graceful restart.
- */
+ *
+ * Supported standards:
+ * <itemize>
+ * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP)
+ * <item> <rfc id="1997"> - BGP Communities Attribute
+ * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature
+ * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6
+ * <item> <rfc id="2918"> - Route Refresh Capability
+ * <item> <rfc id="3107"> - Carrying Label Information in BGP
+ * <item> <rfc id="4360"> - BGP Extended Communities Attribute
+ * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks
+ * <item> <rfc id="4456"> - BGP Route Reflection
+ * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message
+ * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks
+ * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP
+ * <item> <rfc id="4760"> - Multiprotocol extensions for BGP
+ * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS
+ * <item> <rfc id="5065"> - AS confederations for BGP
+ * <item> <rfc id="5082"> - Generalized TTL Security Mechanism
+ * <item> <rfc id="5492"> - Capabilities Advertisement with BGP
+ * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop
+ * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules
+ * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community
+ * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
+ * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
+ * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
+ * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
+ * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
+ * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
+ * <item> <rfc id="7947"> - Internet Exchange BGP Route Server
+ * <item> <rfc id="8092"> - BGP Large Communities Attribute
+ * <item> <rfc id="8203"> - BGP Administrative Shutdown Communication
+ * </itemize>
+*/
#undef LOCAL_DEBUG
+#include <stdlib.h>
+
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
@@ -80,70 +121,150 @@
struct linpool *bgp_linpool; /* Global temporary pool */
-static sock *bgp_listen_sk; /* Global listening socket */
-static int bgp_counter; /* Number of protocol instances using the listening socket */
+struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */
+static list bgp_sockets; /* Global list of listening sockets */
+
-static void bgp_close(struct bgp_proto *p, int apply_md5);
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
-static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
+static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
+static void bgp_listen_sock_err(sock *sk UNUSED, int err);
/**
* bgp_open - open a BGP instance
* @p: BGP instance
*
- * This function allocates and configures shared BGP resources.
- * Should be called as the last step during initialization
- * (when lock is acquired and neighbor is ready).
- * When error, state changed to PS_DOWN, -1 is returned and caller
- * should return immediately.
+ * This function allocates and configures shared BGP resources, mainly listening
+ * sockets. Should be called as the last step during initialization (when lock
+ * is acquired and neighbor is ready). When error, caller should change state to
+ * PS_DOWN and return immediately.
*/
static int
bgp_open(struct bgp_proto *p)
{
- struct config *cfg = p->cf->c.global;
- int errcode;
+ struct bgp_socket *bs = NULL;
+ struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
+ ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
+ (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
+ uint port = p->cf->local_port;
- if (!bgp_listen_sk)
- bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
+ /* FIXME: Add some global init? */
+ if (!bgp_linpool)
+ init_list(&bgp_sockets);
+
+ /* We assume that cf->iface is defined iff cf->local_ip is link-local */
- if (!bgp_listen_sk)
+ WALK_LIST(bs, bgp_sockets)
+ if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->iface == ifa) && (bs->sk->sport == port))
{
- errcode = BEM_NO_SOCKET;
- goto err;
+ bs->uc++;
+ p->sock = bs;
+ return 0;
}
- if (!bgp_linpool)
- bgp_linpool = lp_new(&root_pool, 4080);
+ sock *sk = sk_new(proto_pool);
+ sk->type = SK_TCP_PASSIVE;
+ sk->ttl = 255;
+ sk->saddr = addr;
+ sk->sport = port;
+ sk->flags = 0;
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->rbsize = BGP_RX_BUFFER_SIZE;
+ sk->tbsize = BGP_TX_BUFFER_SIZE;
+ sk->rx_hook = bgp_incoming_connection;
+ sk->err_hook = bgp_listen_sock_err;
+
+ if (sk_open(sk) < 0)
+ goto err;
- bgp_counter++;
+ bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
+ bs->sk = sk;
+ bs->uc = 1;
+ p->sock = bs;
- if (p->cf->password)
- if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
- p->cf->iface, p->cf->password, p->cf->setkey) < 0)
- {
- sk_log_error(bgp_listen_sk, p->p.name);
- bgp_close(p, 0);
- errcode = BEM_INVALID_MD5;
- goto err;
- }
+ add_tail(&bgp_sockets, &bs->n);
+
+ if (!bgp_linpool)
+ {
+ bgp_linpool = lp_new_default(proto_pool);
+ bgp_linpool2 = lp_new_default(proto_pool);
+ }
return 0;
err:
- p->p.disabled = 1;
- bgp_store_error(p, NULL, BE_MISC, errcode);
- proto_notify_state(&p->p, PS_DOWN);
+ sk_log_error(sk, p->p.name);
+ log(L_ERR "%s: Cannot open listening socket", p->p.name);
+ rfree(sk);
return -1;
}
+/**
+ * bgp_close - close a BGP instance
+ * @p: BGP instance
+ *
+ * This function frees and deconfigures shared BGP resources.
+ */
+static void
+bgp_close(struct bgp_proto *p)
+{
+ struct bgp_socket *bs = p->sock;
+
+ ASSERT(bs && bs->uc);
+
+ if (--bs->uc)
+ return;
+
+ rfree(bs->sk);
+ rem_node(&bs->n);
+ mb_free(bs);
+
+ if (!EMPTY_LIST(bgp_sockets))
+ return;
+
+ rfree(bgp_linpool);
+ bgp_linpool = NULL;
+
+ rfree(bgp_linpool2);
+ bgp_linpool2 = NULL;
+}
+
+static inline int
+bgp_setup_auth(struct bgp_proto *p, int enable)
+{
+ if (p->cf->password)
+ {
+ int rv = sk_set_md5_auth(p->sock->sk,
+ p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
+ enable ? p->cf->password : NULL, p->cf->setkey);
+
+ if (rv < 0)
+ sk_log_error(p->sock->sk, p->p.name);
+
+ return rv;
+ }
+ else
+ return 0;
+}
+
+static inline struct bgp_channel *
+bgp_find_channel(struct bgp_proto *p, u32 afi)
+{
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->afi == afi)
+ return c;
+
+ return NULL;
+}
+
static void
bgp_startup(struct bgp_proto *p)
{
BGP_TRACE(D_EVENTS, "Started");
- p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
+ p->start_state = BSS_CONNECT;
if (!p->cf->passive)
bgp_active(p);
@@ -159,70 +280,57 @@ bgp_startup_timeout(timer *t)
static void
bgp_initiate(struct bgp_proto *p)
{
- int rv = bgp_open(p);
- if (rv < 0)
- return;
+ int err_val;
+
+ if (bgp_open(p) < 0)
+ { err_val = BEM_NO_SOCKET; goto err1; }
+
+ if (bgp_setup_auth(p, 1) < 0)
+ { err_val = BEM_INVALID_MD5; goto err2; }
if (p->cf->bfd)
bgp_update_bfd(p, p->cf->bfd);
if (p->startup_delay)
- {
- p->start_state = BSS_DELAY;
- BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
- bgp_start_timer(p->startup_timer, p->startup_delay);
- }
+ {
+ p->start_state = BSS_DELAY;
+ BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
+ bgp_start_timer(p->startup_timer, p->startup_delay);
+ }
else
bgp_startup(p);
-}
-/**
- * bgp_close - close a BGP instance
- * @p: BGP instance
- * @apply_md5: 0 to disable unsetting MD5 auth
- *
- * This function frees and deconfigures shared BGP resources.
- * @apply_md5 is set to 0 when bgp_close is called as a cleanup
- * from failed bgp_open().
- */
-static void
-bgp_close(struct bgp_proto *p, int apply_md5)
-{
- ASSERT(bgp_counter);
- bgp_counter--;
+ return;
- if (p->cf->password && apply_md5)
- if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
- p->cf->iface, NULL, p->cf->setkey) < 0)
- sk_log_error(bgp_listen_sk, p->p.name);
+err2:
+ bgp_close(p);
+err1:
+ p->p.disabled = 1;
+ bgp_store_error(p, NULL, BE_MISC, err_val);
+ proto_notify_state(&p->p, PS_DOWN);
- if (!bgp_counter)
- {
- rfree(bgp_listen_sk);
- bgp_listen_sk = NULL;
- rfree(bgp_linpool);
- bgp_linpool = NULL;
- }
+ return;
}
/**
* bgp_start_timer - start a BGP timer
* @t: timer
- * @value: time to fire (0 to disable the timer)
+ * @value: time (in seconds) to fire (0 to disable the timer)
*
- * This functions calls tm_start() on @t with time @value and the
- * amount of randomization suggested by the BGP standard. Please use
- * it for all BGP timers.
+ * This functions calls tm_start() on @t with time @value and the amount of
+ * randomization suggested by the BGP standard. Please use it for all BGP
+ * timers.
*/
void
-bgp_start_timer(timer *t, int value)
+bgp_start_timer(timer *t, uint value)
{
if (value)
- {
- /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
- t->randomize = value / 4;
- tm_start(t, value - t->randomize);
- }
+ {
+ /* The randomization procedure is specified in RFC 4271 section 10 */
+ btime time = value S;
+ btime randomize = random() % ((time / 4) + 1);
+ tm_start(t, time - randomize);
+ }
else
tm_stop(t);
}
@@ -231,8 +339,8 @@ bgp_start_timer(timer *t, int value)
* bgp_close_conn - close a BGP connection
* @conn: connection to close
*
- * This function takes a connection described by the &bgp_conn structure,
- * closes its socket and frees all resources associated with it.
+ * This function takes a connection described by the &bgp_conn structure, closes
+ * its socket and frees all resources associated with it.
*/
void
bgp_close_conn(struct bgp_conn *conn)
@@ -241,16 +349,22 @@ bgp_close_conn(struct bgp_conn *conn)
DBG("BGP: Closing connection\n");
conn->packets_to_send = 0;
- rfree(conn->connect_retry_timer);
- conn->connect_retry_timer = NULL;
+ conn->channels_to_send = 0;
+ rfree(conn->connect_timer);
+ conn->connect_timer = NULL;
rfree(conn->keepalive_timer);
conn->keepalive_timer = NULL;
rfree(conn->hold_timer);
conn->hold_timer = NULL;
- rfree(conn->sk);
- conn->sk = NULL;
rfree(conn->tx_ev);
conn->tx_ev = NULL;
+ rfree(conn->sk);
+ conn->sk = NULL;
+
+ mb_free(conn->local_caps);
+ conn->local_caps = NULL;
+ mb_free(conn->remote_caps);
+ conn->remote_caps = NULL;
}
@@ -258,9 +372,9 @@ bgp_close_conn(struct bgp_conn *conn)
* bgp_update_startup_delay - update a startup delay
* @p: BGP instance
*
- * This function updates a startup delay that is used to postpone next BGP connect.
- * It also handles disable_after_error and might stop BGP instance when error
- * happened and disable_after_error is on.
+ * This function updates a startup delay that is used to postpone next BGP
+ * connect. It also handles disable_after_error and might stop BGP instance
+ * when error happened and disable_after_error is on.
*
* It should be called when BGP protocol error happened.
*/
@@ -271,17 +385,17 @@ bgp_update_startup_delay(struct bgp_proto *p)
DBG("BGP: Updating startup delay\n");
- if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
+ if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
p->startup_delay = 0;
- p->last_proto_error = now;
+ p->last_proto_error = current_time();
if (cf->disable_after_error)
- {
- p->startup_delay = 0;
- p->p.disabled = 1;
- return;
- }
+ {
+ p->startup_delay = 0;
+ p->p.disabled = 1;
+ return;
+ }
if (!p->startup_delay)
p->startup_delay = cf->error_delay_time_min;
@@ -293,29 +407,35 @@ static void
bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
{
switch (conn->state)
- {
- case BS_IDLE:
- case BS_CLOSE:
- return;
- case BS_CONNECT:
- case BS_ACTIVE:
- bgp_conn_enter_idle_state(conn);
- return;
- case BS_OPENSENT:
- case BS_OPENCONFIRM:
- case BS_ESTABLISHED:
- bgp_error(conn, 6, subcode, data, len);
- return;
- default:
- bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
- }
+ {
+ case BS_IDLE:
+ case BS_CLOSE:
+ return;
+
+ case BS_CONNECT:
+ case BS_ACTIVE:
+ bgp_conn_enter_idle_state(conn);
+ return;
+
+ case BS_OPENSENT:
+ case BS_OPENCONFIRM:
+ case BS_ESTABLISHED:
+ bgp_error(conn, 6, subcode, data, len);
+ return;
+
+ default:
+ bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
+ }
}
static void
bgp_down(struct bgp_proto *p)
{
if (p->start_state > BSS_PREPARE)
- bgp_close(p, 1);
+ {
+ bgp_setup_auth(p, 0);
+ bgp_close(p);
+ }
BGP_TRACE(D_EVENTS, "Down");
proto_notify_state(&p->p, PS_DOWN);
@@ -327,15 +447,15 @@ bgp_decision(void *vp)
struct bgp_proto *p = vp;
DBG("BGP: Decision start\n");
- if ((p->p.proto_state == PS_START)
- && (p->outgoing_conn.state == BS_IDLE)
- && (p->incoming_conn.state != BS_OPENCONFIRM)
- && (!p->cf->passive))
+ if ((p->p.proto_state == PS_START) &&
+ (p->outgoing_conn.state == BS_IDLE) &&
+ (p->incoming_conn.state != BS_OPENCONFIRM) &&
+ !p->cf->passive)
bgp_active(p);
- if ((p->p.proto_state == PS_STOP)
- && (p->outgoing_conn.state == BS_IDLE)
- && (p->incoming_conn.state == BS_IDLE))
+ if ((p->p.proto_state == PS_STOP) &&
+ (p->outgoing_conn.state == BS_IDLE) &&
+ (p->incoming_conn.state == BS_IDLE))
bgp_down(p);
}
@@ -349,7 +469,7 @@ bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
}
static inline void
-bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
+bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
{
if (conn->bgp->p.mrtdump & MD_STATES)
mrt_dump_bgp_state_change(conn, conn->state, new_state);
@@ -364,13 +484,17 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
bgp_conn_set_state(conn, BS_OPENCONFIRM);
}
+static const struct bgp_af_caps dummy_af_caps = { };
+
void
bgp_conn_enter_established_state(struct bgp_conn *conn)
{
struct bgp_proto *p = conn->bgp;
+ struct bgp_caps *local = conn->local_caps;
+ struct bgp_caps *peer = conn->remote_caps;
+ struct bgp_channel *c;
BGP_TRACE(D_EVENTS, "BGP session established");
- DBG("BGP: UP!!!\n");
/* For multi-hop BGP sessions */
if (ipa_zero(p->source_addr))
@@ -381,30 +505,92 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->conn = conn;
p->last_error_class = 0;
p->last_error_code = 0;
- p->feed_state = BFS_NONE;
- p->load_state = BFS_NONE;
- bgp_init_bucket_table(p);
- bgp_init_prefix_table(p, 8);
- int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
+ p->as4_session = conn->as4_session;
+
+ p->route_refresh = peer->route_refresh;
+ p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
- if (p->p.gr_recovery && !peer_gr_ready)
- proto_graceful_restart_unlock(&p->p);
+ /* Whether we may handle possible GR of peer (it has some AF GR-able) */
+ p->gr_ready = 0; /* Updated later */
- if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
- p->p.gr_wait = 1;
+ /* Whether peer is ready to handle our GR recovery */
+ int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
- if (p->gr_active)
+ if (p->gr_active_num)
tm_stop(p->gr_timer);
- if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
- bgp_graceful_restart_done(p);
+ /* Number of active channels */
+ int num = 0;
- /* GR capability implies that neighbor will send End-of-RIB */
- if (conn->peer_gr_aware)
- p->load_state = BFS_LOADING;
+ WALK_LIST(c, p->p.channels)
+ {
+ const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
+ const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
- /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
+ /* Ignore AFIs that were not announced in multiprotocol capability */
+ if (!loc || !loc->ready)
+ loc = &dummy_af_caps;
+
+ if (!rem || !rem->ready)
+ rem = &dummy_af_caps;
+
+ int active = loc->ready && rem->ready;
+ c->c.disabled = !active;
+ c->c.reloadable = p->route_refresh;
+
+ c->index = active ? num++ : 0;
+
+ c->feed_state = BFS_NONE;
+ c->load_state = BFS_NONE;
+
+ /* Channels where peer may do GR */
+ c->gr_ready = active && local->gr_aware && rem->gr_able;
+ p->gr_ready = p->gr_ready || c->gr_ready;
+
+ /* Channels not able to recover gracefully */
+ if (p->p.gr_recovery && (!active || !peer_gr_ready))
+ channel_graceful_restart_unlock(&c->c);
+
+ /* Channels waiting for local convergence */
+ if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
+ c->c.gr_wait = 1;
+
+ /* Channels where peer is not able to recover gracefully */
+ if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
+ bgp_graceful_restart_done(c);
+
+ /* GR capability implies that neighbor will send End-of-RIB */
+ if (peer->gr_aware)
+ c->load_state = BFS_LOADING;
+
+ c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
+ c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
+ c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
+
+ /* Update RA mode */
+ if (c->add_path_tx)
+ c->c.ra_mode = RA_ANY;
+ else if (c->cf->secondary)
+ c->c.ra_mode = RA_ACCEPTED;
+ else
+ c->c.ra_mode = RA_OPTIMAL;
+ }
+
+ p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
+ p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
+ p->channel_count = num;
+
+ WALK_LIST(c, p->p.channels)
+ {
+ if (c->c.disabled)
+ continue;
+
+ p->afi_map[c->index] = c->afi;
+ p->channel_map[c->index] = c;
+ }
+
+ /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
bgp_conn_set_state(conn, BS_ESTABLISHED);
proto_notify_state(&p->p, PS_UP);
@@ -416,9 +602,6 @@ bgp_conn_leave_established_state(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "BGP session closed");
p->conn = NULL;
- bgp_free_prefix_table(p);
- bgp_free_bucket_table(p);
-
if (p->p.proto_state == PS_UP)
bgp_stop(p, 0, NULL, 0);
}
@@ -471,34 +654,71 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
- p->gr_active ? " - already pending" : "");
- proto_notify_state(&p->p, PS_START);
+ p->gr_active_num ? " - already pending" : "");
+
+ p->gr_active_num = 0;
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ {
+ /* FIXME: perhaps check for channel state instead of disabled flag? */
+ if (c->c.disabled)
+ continue;
+
+ if (c->gr_ready)
+ {
+ if (c->gr_active)
+ rt_refresh_end(c->c.table, &c->c);
+
+ c->gr_active = 1;
+ p->gr_active_num++;
+ rt_refresh_begin(c->c.table, &c->c);
+ }
+ else
+ {
+ /* Just flush the routes */
+ rt_refresh_begin(c->c.table, &c->c);
+ rt_refresh_end(c->c.table, &c->c);
+ }
+
+ /* Reset bucket and prefix tables */
+ bgp_free_bucket_table(c);
+ bgp_free_prefix_table(c);
+ bgp_init_bucket_table(c);
+ bgp_init_prefix_table(c);
+ c->packets_to_send = 0;
+ }
- if (p->gr_active)
- rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+ /* p->gr_ready -> at least one active channel is c->gr_ready */
+ ASSERT(p->gr_active_num > 0);
- p->gr_active = 1;
- bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
- rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
+ proto_notify_state(&p->p, PS_START);
+ bgp_start_timer(p->gr_timer, p->conn->remote_caps->gr_time);
}
/**
* bgp_graceful_restart_done - finish active BGP graceful restart
- * @p: BGP instance
+ * @c: BGP channel
*
* This function is called when the active BGP graceful restart of the neighbor
- * should be finished - either successfully (the neighbor sends all paths and
- * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
- * not support BGP graceful restart on the new session). The function ends
- * routing table refresh cycle and stops BGP restart timer.
+ * should be finished for channel @c - either successfully (the neighbor sends
+ * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
+ * unsuccessfully (the neighbor does not support BGP graceful restart on the new
+ * session). The function ends the routing table refresh cycle.
*/
void
-bgp_graceful_restart_done(struct bgp_proto *p)
+bgp_graceful_restart_done(struct bgp_channel *c)
{
- BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
- p->gr_active = 0;
- tm_stop(p->gr_timer);
- rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ ASSERT(c->gr_active);
+ c->gr_active = 0;
+ p->gr_active_num--;
+
+ if (!p->gr_active_num)
+ BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
+
+ rt_refresh_end(c->c.table, &c->c);
}
/**
@@ -522,7 +742,7 @@ bgp_graceful_restart_timeout(timer *t)
/**
* bgp_refresh_begin - start incoming enhanced route refresh sequence
- * @p: BGP instance
+ * @c: BGP channel
*
* This function is called when an incoming enhanced route refresh sequence is
* started by the neighbor, demarcated by the BoRR packet. The function updates
@@ -531,18 +751,20 @@ bgp_graceful_restart_timeout(timer *t)
* ensure that these two sequences do not overlap.
*/
void
-bgp_refresh_begin(struct bgp_proto *p)
+bgp_refresh_begin(struct bgp_channel *c)
{
- if (p->load_state == BFS_LOADING)
- { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
+ struct bgp_proto *p = (void *) c->c.proto;
- p->load_state = BFS_REFRESHING;
- rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
+ if (c->load_state == BFS_LOADING)
+ { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
+
+ c->load_state = BFS_REFRESHING;
+ rt_refresh_begin(c->c.table, &c->c);
}
/**
* bgp_refresh_end - finish incoming enhanced route refresh sequence
- * @p: BGP instance
+ * @c: BGP channel
*
* This function is called when an incoming enhanced route refresh sequence is
* finished by the neighbor, demarcated by the EoRR packet. The function updates
@@ -550,39 +772,26 @@ bgp_refresh_begin(struct bgp_proto *p)
* during the sequence are removed by the nest.
*/
void
-bgp_refresh_end(struct bgp_proto *p)
+bgp_refresh_end(struct bgp_channel *c)
{
- if (p->load_state != BFS_REFRESHING)
- { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
+ struct bgp_proto *p = (void *) c->c.proto;
- p->load_state = BFS_NONE;
- rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+ if (c->load_state != BFS_REFRESHING)
+ { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
+
+ c->load_state = BFS_NONE;
+ rt_refresh_end(c->c.table, &c->c);
}
static void
bgp_send_open(struct bgp_conn *conn)
{
- conn->start_state = conn->bgp->start_state;
-
- // Default values, possibly changed by receiving capabilities.
- conn->advertised_as = 0;
- conn->peer_refresh_support = 0;
- conn->peer_as4_support = 0;
- conn->peer_add_path = 0;
- conn->peer_enhanced_refresh_support = 0;
- conn->peer_gr_aware = 0;
- conn->peer_gr_able = 0;
- conn->peer_gr_time = 0;
- conn->peer_gr_flags = 0;
- conn->peer_gr_aflags = 0;
- conn->peer_ext_messages_support = 0;
-
DBG("BGP: Sending open\n");
conn->sk->rx_hook = bgp_rx;
conn->sk->tx_hook = bgp_tx;
- tm_stop(conn->connect_retry_timer);
- bgp_schedule_packet(conn, PKT_OPEN);
+ tm_stop(conn->connect_timer);
+ bgp_schedule_packet(conn, NULL, PKT_OPEN);
bgp_conn_set_state(conn, BS_OPENSENT);
bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
}
@@ -605,10 +814,10 @@ bgp_connect_timeout(timer *t)
DBG("BGP: connect_timeout\n");
if (p->p.proto_state == PS_START)
- {
- bgp_close_conn(conn);
- bgp_connect(p);
- }
+ {
+ bgp_close_conn(conn);
+ bgp_connect(p);
+ }
else
bgp_conn_enter_idle_state(conn);
}
@@ -672,7 +881,7 @@ bgp_keepalive_timeout(timer *t)
struct bgp_conn *conn = t->data;
DBG("BGP: Keepalive timer\n");
- bgp_schedule_packet(conn, PKT_KEEPALIVE);
+ bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
/* Kick TX a bit faster */
if (ev_active(conn->tx_ev))
@@ -682,21 +891,18 @@ bgp_keepalive_timeout(timer *t)
static void
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
{
- timer *t;
-
conn->sk = NULL;
conn->bgp = p;
+
conn->packets_to_send = 0;
+ conn->channels_to_send = 0;
+ conn->last_channel = 0;
+ conn->last_channel_count = 0;
+
+ conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0);
+ conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
+ conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
- t = conn->connect_retry_timer = tm_new(p->p.pool);
- t->hook = bgp_connect_timeout;
- t->data = conn;
- t = conn->hold_timer = tm_new(p->p.pool);
- t->hook = bgp_hold_timeout;
- t->data = conn;
- t = conn->keepalive_timer = tm_new(p->p.pool);
- t->hook = bgp_keepalive_timeout;
- t->data = conn;
conn->tx_ev = ev_new(p->p.pool);
conn->tx_ev->hook = bgp_kick_tx;
conn->tx_ev->data = conn;
@@ -720,7 +926,7 @@ bgp_active(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
bgp_setup_conn(p, conn);
bgp_conn_set_state(conn, BS_ACTIVE);
- bgp_start_timer(conn->connect_retry_timer, delay);
+ bgp_start_timer(conn->connect_timer, delay);
}
/**
@@ -734,12 +940,11 @@ bgp_active(struct bgp_proto *p)
static void
bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
{
- sock *s;
struct bgp_conn *conn = &p->outgoing_conn;
int hops = p->cf->multihop ? : 1;
DBG("BGP: Connecting\n");
- s = sk_new(p->p.pool);
+ sock *s = sk_new(p->p.pool);
s->type = SK_TCP_ACTIVE;
s->saddr = p->source_addr;
s->daddr = p->cf->remote_ip;
@@ -767,10 +972,10 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
goto err;
DBG("BGP: Waiting for connect success\n");
- bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
+ bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
return;
- err:
+err:
sk_log_error(s, p->p.name);
bgp_sock_err(s, 0);
return;
@@ -784,16 +989,15 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
static struct bgp_proto *
bgp_find_proto(sock *sk)
{
- struct proto_config *pc;
+ struct bgp_proto *p;
- WALK_LIST(pc, config->protos)
- if ((pc->protocol == &proto_bgp) && pc->proto)
- {
- struct bgp_proto *p = (struct bgp_proto *) pc->proto;
- if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
- (!p->cf->iface || (p->cf->iface == sk->iface)))
- return p;
- }
+ WALK_LIST(p, proto_list)
+ if ((p->p.proto == &proto_bgp) &&
+ ipa_equal(p->cf->remote_ip, sk->daddr) &&
+ (!p->cf->iface || (p->cf->iface == sk->iface)) &&
+ (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)) &&
+ (p->cf->local_port == sk->sport))
+ return p;
return NULL;
}
@@ -819,12 +1023,12 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
p = bgp_find_proto(sk);
if (!p)
- {
- log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
- sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
- rfree(sk);
- return 0;
- }
+ {
+ log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
+ sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
+ rfree(sk);
+ return 0;
+ }
/*
* BIRD should keep multiple incoming connections in OpenSent state (for
@@ -837,26 +1041,26 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
- {
- bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
- bgp_handle_graceful_restart(p);
- bgp_conn_enter_idle_state(p->conn);
- acc = 1;
-
- /* There might be separate incoming connection in OpenSent state */
- if (p->incoming_conn.state > BS_ACTIVE)
- bgp_close_conn(&p->incoming_conn);
- }
+ {
+ bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
+ bgp_handle_graceful_restart(p);
+ bgp_conn_enter_idle_state(p->conn);
+ acc = 1;
+
+ /* There might be separate incoming connection in OpenSent state */
+ if (p->incoming_conn.state > BS_ACTIVE)
+ bgp_close_conn(&p->incoming_conn);
+ }
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
sk->dport, acc ? "accepted" : "rejected");
if (!acc)
- {
- rfree(sk);
- return 0;
- }
+ {
+ rfree(sk);
+ return 0;
+ }
hops = p->cf->multihop ? : 1;
@@ -868,11 +1072,11 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
goto err;
if (p->cf->enable_extended_messages)
- {
- sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
- sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
- sk_reallocate(sk);
- }
+ {
+ sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
+ sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
+ sk_reallocate(sk);
+ }
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
@@ -895,34 +1099,6 @@ bgp_listen_sock_err(sock *sk UNUSED, int err)
log(L_ERR "BGP: Error on listening socket: %M", err);
}
-static sock *
-bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
-{
- sock *s = sk_new(&root_pool);
- DBG("BGP: Creating listening socket\n");
- s->type = SK_TCP_PASSIVE;
- s->ttl = 255;
- s->saddr = addr;
- s->sport = port ? port : BGP_PORT;
- s->flags = flags ? 0 : SKF_V6ONLY;
- s->tos = IP_PREC_INTERNET_CONTROL;
- s->rbsize = BGP_RX_BUFFER_SIZE;
- s->tbsize = BGP_TX_BUFFER_SIZE;
- s->rx_hook = bgp_incoming_connection;
- s->err_hook = bgp_listen_sock_err;
-
- if (sk_open(s) < 0)
- goto err;
-
- return s;
-
- err:
- sk_log_error(s, "BGP");
- log(L_ERR "BGP: Cannot open listening socket");
- rfree(s);
- return NULL;
-}
-
static void
bgp_start_neighbor(struct bgp_proto *p)
{
@@ -931,23 +1107,10 @@ bgp_start_neighbor(struct bgp_proto *p)
if (ipa_zero(p->source_addr))
p->source_addr = p->neigh->ifa->ip;
-#ifdef IPV6
- {
- struct ifa *a;
- p->local_link = IPA_NONE;
- WALK_LIST(a, p->neigh->iface->addrs)
- if (a->scope == SCOPE_LINK)
- {
- p->local_link = a->ip;
- break;
- }
-
- if (! ipa_nonzero(p->local_link))
- log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name);
-
- DBG("BGP: Selected link-level address %I\n", p->local_link);
- }
-#endif
+ if (ipa_is_link_local(p->source_addr))
+ p->link_addr = p->source_addr;
+ else if (p->neigh->iface->llv6)
+ p->link_addr = p->neigh->iface->llv6->ip;
bgp_initiate(p);
}
@@ -967,34 +1130,34 @@ bgp_neigh_notify(neighbor *n)
int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
if (n->scope <= 0)
+ {
+ if (!prepare)
{
- if (!prepare)
- {
- BGP_TRACE(D_EVENTS, "Neighbor lost");
- bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
- /* Perhaps also run bgp_update_startup_delay(p)? */
- bgp_stop(p, 0, NULL, 0);
- }
+ BGP_TRACE(D_EVENTS, "Neighbor lost");
+ bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
+ /* Perhaps also run bgp_update_startup_delay(p)? */
+ bgp_stop(p, 0, NULL, 0);
}
+ }
else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
+ {
+ if (!prepare)
{
- if (!prepare)
- {
- BGP_TRACE(D_EVENTS, "Link down");
- bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
- if (ps == PS_UP)
- bgp_update_startup_delay(p);
- bgp_stop(p, 0, NULL, 0);
- }
+ BGP_TRACE(D_EVENTS, "Link down");
+ bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
+ if (ps == PS_UP)
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0, NULL, 0);
}
+ }
else
+ {
+ if (prepare)
{
- if (prepare)
- {
- BGP_TRACE(D_EVENTS, "Neighbor ready");
- bgp_start_neighbor(p);
- }
+ BGP_TRACE(D_EVENTS, "Neighbor ready");
+ bgp_start_neighbor(p);
}
+ }
}
static void
@@ -1004,13 +1167,13 @@ bgp_bfd_notify(struct bfd_request *req)
int ps = p->p.proto_state;
if (req->down && ((ps == PS_START) || (ps == PS_UP)))
- {
- BGP_TRACE(D_EVENTS, "BFD session down");
- bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
- if (ps == PS_UP)
- bgp_update_startup_delay(p);
- bgp_stop(p, 0, NULL, 0);
- }
+ {
+ BGP_TRACE(D_EVENTS, "BFD session down");
+ bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
+ if (ps == PS_UP)
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0, NULL, 0);
+ }
}
static void
@@ -1022,71 +1185,72 @@ bgp_update_bfd(struct bgp_proto *p, int use_bfd)
bgp_bfd_notify, p);
if (!use_bfd && p->bfd_req)
- {
- rfree(p->bfd_req);
- p->bfd_req = NULL;
- }
+ {
+ rfree(p->bfd_req);
+ p->bfd_req = NULL;
+ }
}
-static int
-bgp_reload_routes(struct proto *P)
+static void
+bgp_reload_routes(struct channel *C)
{
- struct bgp_proto *p = (struct bgp_proto *) P;
- if (!p->conn || !p->conn->peer_refresh_support)
- return 0;
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
- bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
- return 1;
+ ASSERT(p->conn && p->route_refresh);
+
+ bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
}
static void
-bgp_feed_begin(struct proto *P, int initial)
+bgp_feed_begin(struct channel *C, int initial)
{
- struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
/* This should not happen */
if (!p->conn)
return;
if (initial && p->cf->gr_mode)
- p->feed_state = BFS_LOADING;
+ c->feed_state = BFS_LOADING;
/* It is refeed and both sides support enhanced route refresh */
- if (!initial && p->cf->enable_refresh &&
- p->conn->peer_enhanced_refresh_support)
- {
- /* BoRR must not be sent before End-of-RIB */
- if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
- return;
+ if (!initial && p->enhanced_refresh)
+ {
+ /* BoRR must not be sent before End-of-RIB */
+ if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
+ return;
- p->feed_state = BFS_REFRESHING;
- bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
- }
+ c->feed_state = BFS_REFRESHING;
+ bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
+ }
}
static void
-bgp_feed_end(struct proto *P)
+bgp_feed_end(struct channel *C)
{
- struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
/* This should not happen */
if (!p->conn)
return;
/* Non-demarcated feed ended, nothing to do */
- if (p->feed_state == BFS_NONE)
+ if (c->feed_state == BFS_NONE)
return;
/* Schedule End-of-RIB packet */
- if (p->feed_state == BFS_LOADING)
- p->feed_state = BFS_LOADED;
+ if (c->feed_state == BFS_LOADING)
+ c->feed_state = BFS_LOADED;
/* Schedule EoRR packet */
- if (p->feed_state == BFS_REFRESHING)
- p->feed_state = BFS_REFRESHED;
+ if (c->feed_state == BFS_REFRESHING)
+ c->feed_state = BFS_REFRESHED;
/* Kick TX hook */
- bgp_schedule_packet(p->conn, PKT_UPDATE);
+ bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
@@ -1097,30 +1261,30 @@ bgp_start_locked(struct object_lock *lock)
struct bgp_config *cf = p->cf;
if (p->p.proto_state != PS_START)
- {
- DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
- return;
- }
+ {
+ DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
+ return;
+ }
DBG("BGP: Got lock\n");
if (cf->multihop)
- {
- /* Multi-hop sessions do not use neighbor entries */
- bgp_initiate(p);
- return;
- }
+ {
+ /* Multi-hop sessions do not use neighbor entries */
+ bgp_initiate(p);
+ return;
+ }
neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
if (!n)
- {
- log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
- /* As we do not start yet, we can just disable protocol */
- p->p.disabled = 1;
- bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
- proto_notify_state(&p->p, PS_DOWN);
- return;
- }
+ {
+ log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
+ /* As we do not start yet, we can just disable protocol */
+ p->p.disabled = 1;
+ bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
+ proto_notify_state(&p->p, PS_DOWN);
+ return;
+ }
p->neigh = n;
@@ -1145,36 +1309,34 @@ bgp_start(struct proto *P)
p->neigh = NULL;
p->bfd_req = NULL;
p->gr_ready = 0;
- p->gr_active = 0;
-
- rt_lock_table(p->igp_table);
+ p->gr_active_num = 0;
p->event = ev_new(p->p.pool);
p->event->hook = bgp_decision;
p->event->data = p;
- p->startup_timer = tm_new(p->p.pool);
- p->startup_timer->hook = bgp_startup_timeout;
- p->startup_timer->data = p;
-
- p->gr_timer = tm_new(p->p.pool);
- p->gr_timer->hook = bgp_graceful_restart_timeout;
- p->gr_timer->data = p;
+ p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
+ p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
p->local_id = proto_get_router_id(P->cf);
if (p->rr_client)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
p->remote_id = 0;
- p->source_addr = p->cf->source_addr;
+ p->source_addr = p->cf->local_ip;
+ p->link_addr = IPA_NONE;
+ /* Lock all channels when in GR recovery mode */
if (p->p.gr_recovery && p->cf->gr_mode)
- proto_graceful_restart_lock(P);
+ {
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ channel_graceful_restart_lock(&c->c);
+ }
/*
- * Before attempting to create the connection, we need to lock the
- * port, so that are sure we're the only instance attempting to talk
- * with that neighbor.
+ * Before attempting to create the connection, we need to lock the port,
+ * so that we are the only instance attempting to talk with that neighbor.
*/
lock = p->lock = olock_new(P->pool);
@@ -1205,45 +1367,45 @@ bgp_shutdown(struct proto *P)
BGP_TRACE(D_EVENTS, "Shutdown requested");
switch (P->down_code)
- {
- case PDC_CF_REMOVE:
- case PDC_CF_DISABLE:
- subcode = 3; // Errcode 6, 3 - peer de-configured
- break;
-
- case PDC_CF_RESTART:
- subcode = 6; // Errcode 6, 6 - other configuration change
- break;
-
- case PDC_CMD_DISABLE:
- case PDC_CMD_SHUTDOWN:
- subcode = 2; // Errcode 6, 2 - administrative shutdown
- message = P->message;
- break;
-
- case PDC_CMD_RESTART:
- subcode = 4; // Errcode 6, 4 - administrative reset
- message = P->message;
- break;
-
- case PDC_RX_LIMIT_HIT:
- case PDC_IN_LIMIT_HIT:
- subcode = 1; // Errcode 6, 1 - max number of prefixes reached
- /* log message for compatibility */
- log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
- goto limit;
-
- case PDC_OUT_LIMIT_HIT:
- subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
-
- limit:
- bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
- if (proto_restart)
- bgp_update_startup_delay(p);
- else
- p->startup_delay = 0;
- goto done;
- }
+ {
+ case PDC_CF_REMOVE:
+ case PDC_CF_DISABLE:
+ subcode = 3; // Errcode 6, 3 - peer de-configured
+ break;
+
+ case PDC_CF_RESTART:
+ subcode = 6; // Errcode 6, 6 - other configuration change
+ break;
+
+ case PDC_CMD_DISABLE:
+ case PDC_CMD_SHUTDOWN:
+ subcode = 2; // Errcode 6, 2 - administrative shutdown
+ message = P->message;
+ break;
+
+ case PDC_CMD_RESTART:
+ subcode = 4; // Errcode 6, 4 - administrative reset
+ message = P->message;
+ break;
+
+ case PDC_RX_LIMIT_HIT:
+ case PDC_IN_LIMIT_HIT:
+ subcode = 1; // Errcode 6, 1 - max number of prefixes reached
+ /* log message for compatibility */
+ log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
+ goto limit;
+
+ case PDC_OUT_LIMIT_HIT:
+ subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
+
+ limit:
+ bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
+ if (proto_restart)
+ bgp_update_startup_delay(p);
+ else
+ p->startup_delay = 0;
+ goto done;
+ }
bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
p->startup_delay = 0;
@@ -1267,27 +1429,13 @@ done:
return p->p.proto_state;
}
-static void
-bgp_cleanup(struct proto *P)
-{
- struct bgp_proto *p = (struct bgp_proto *) P;
- rt_unlock_table(p->igp_table);
-}
-
-static rtable *
-get_igp_table(struct bgp_config *cf)
-{
- return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
-}
-
static struct proto *
-bgp_init(struct proto_config *C)
+bgp_init(struct proto_config *CF)
{
- struct proto *P = proto_new(C, sizeof(struct bgp_proto));
- struct bgp_config *c = (struct bgp_config *) C;
+ struct proto *P = proto_new(CF);
struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_config *cf = (struct bgp_config *) CF;
- P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
P->rt_notify = bgp_rt_notify;
P->import_control = bgp_import_control;
P->neigh_notify = bgp_neigh_notify;
@@ -1296,102 +1444,293 @@ bgp_init(struct proto_config *C)
P->feed_end = bgp_feed_end;
P->rte_better = bgp_rte_better;
P->rte_mergable = bgp_rte_mergable;
- P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
-
- p->cf = c;
- p->local_as = c->local_as;
- p->remote_as = c->remote_as;
- p->is_internal = (c->local_as == c->remote_as);
- p->rs_client = c->rs_client;
- p->rr_client = c->rr_client;
- p->igp_table = get_igp_table(c);
+ P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
+
+ p->cf = cf;
+ p->local_as = cf->local_as;
+ p->remote_as = cf->remote_as;
+ p->public_as = cf->local_as;
+ p->is_internal = (cf->local_as == cf->remote_as);
+ p->is_interior = p->is_internal || cf->confederation_member;
+ p->rs_client = cf->rs_client;
+ p->rr_client = cf->rr_client;
+
+ /* Confederation ID is used for truly external peers */
+ if (cf->confederation && !p->is_interior)
+ p->public_as = cf->confederation;
+
+ /* Add all channels */
+ struct bgp_channel_config *cc;
+ WALK_LIST(cc, CF->channels)
+ proto_add_channel(P, &cc->c);
return P;
}
+static void
+bgp_channel_init(struct channel *C, struct channel_config *CF)
+{
+ struct bgp_channel *c = (void *) C;
+ struct bgp_channel_config *cf = (void *) CF;
+
+ c->cf = cf;
+ c->afi = cf->afi;
+ c->desc = cf->desc;
+
+ if (cf->igp_table_ip4)
+ c->igp_table_ip4 = cf->igp_table_ip4->table;
+
+ if (cf->igp_table_ip6)
+ c->igp_table_ip6 = cf->igp_table_ip6->table;
+}
+
+static int
+bgp_channel_start(struct channel *C)
+{
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
+ ip_addr src = p->source_addr;
+
+ if (c->igp_table_ip4)
+ rt_lock_table(c->igp_table_ip4);
+
+ if (c->igp_table_ip6)
+ rt_lock_table(c->igp_table_ip6);
+
+ c->pool = p->p.pool; // XXXX
+ bgp_init_bucket_table(c);
+ bgp_init_prefix_table(c);
+
+ c->next_hop_addr = c->cf->next_hop_addr;
+ c->link_addr = IPA_NONE;
+ c->packets_to_send = 0;
+
+ /* Try to use source address as next hop address */
+ if (ipa_zero(c->next_hop_addr))
+ {
+ if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
+ c->next_hop_addr = src;
+
+ if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
+ c->next_hop_addr = src;
+ }
+
+ /* Use preferred addresses associated with interface / source address */
+ if (ipa_zero(c->next_hop_addr))
+ {
+ /* We know the iface for single-hop, we make lookup for multihop */
+ struct neighbor *nbr = p->neigh ?: neigh_find2(&p->p, &src, NULL, 0);
+ struct iface *iface = nbr ? nbr->iface : NULL;
+
+ if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
+ c->next_hop_addr = iface->addr4->ip;
+
+ if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
+ c->next_hop_addr = iface->addr6->ip;
+ }
+
+ /* Exit if no feasible next hop address is found */
+ if (ipa_zero(c->next_hop_addr))
+ {
+ log(L_WARN "%s: Missing next hop address", p->p.name);
+ return 0;
+ }
+
+ /* Set link-local address for IPv6 single-hop BGP */
+ if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
+ {
+ c->link_addr = p->link_addr;
+
+ if (ipa_zero(c->link_addr))
+ log(L_WARN "%s: Missing link-local address", p->p.name);
+ }
+
+ /* Link local address is already in c->link_addr */
+ if (ipa_is_link_local(c->next_hop_addr))
+ c->next_hop_addr = IPA_NONE;
+
+ return 0; /* XXXX: Currently undefined */
+}
+
+static void
+bgp_channel_shutdown(struct channel *C)
+{
+ struct bgp_channel *c = (void *) C;
+
+ c->next_hop_addr = IPA_NONE;
+ c->link_addr = IPA_NONE;
+ c->packets_to_send = 0;
+}
+
+static void
+bgp_channel_cleanup(struct channel *C)
+{
+ struct bgp_channel *c = (void *) C;
+
+ if (c->igp_table_ip4)
+ rt_unlock_table(c->igp_table_ip4);
+
+ if (c->igp_table_ip6)
+ rt_unlock_table(c->igp_table_ip6);
+}
+
+static inline struct bgp_channel_config *
+bgp_find_channel_config(struct bgp_config *cf, u32 afi)
+{
+ struct bgp_channel_config *cc;
+
+ WALK_LIST(cc, cf->c.channels)
+ if (cc->afi == afi)
+ return cc;
+
+ return NULL;
+}
+
+struct rtable_config *
+bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
+{
+ struct bgp_channel_config *cc2;
+ struct rtable_config *tab;
+
+ /* First, try table connected by the channel */
+ if (cc->c.table->addr_type == type)
+ return cc->c.table;
+
+ /* Find paired channel with the same SAFI but the other AFI */
+ u32 afi2 = cc->afi ^ 0x30000;
+ cc2 = bgp_find_channel_config(cf, afi2);
+
+ /* Second, try IGP table configured in the paired channel */
+ if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
+ return tab;
+
+ /* Third, try table connected by the paired channel */
+ if (cc2 && (cc2->c.table->addr_type == type))
+ return cc2->c.table;
+
+ /* Last, try default table of given type */
+ if (tab = cf->c.global->def_tables[type])
+ return tab;
+
+ cf_error("Undefined IGP table");
+}
+
void
-bgp_check_config(struct bgp_config *c)
+bgp_postconfig(struct proto_config *CF)
{
- int internal = (c->local_as == c->remote_as);
+ struct bgp_config *cf = (void *) CF;
+ int internal = (cf->local_as == cf->remote_as);
/* Do not check templates at all */
- if (c->c.class == SYM_TEMPLATE)
+ if (cf->c.class == SYM_TEMPLATE)
return;
/* EBGP direct by default, IBGP multihop by default */
- if (c->multihop < 0)
- c->multihop = internal ? 64 : 0;
-
- /* Different default for gw_mode */
- if (!c->gw_mode)
- c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
-
- /* Different default based on rs_client */
- if (!c->missing_lladdr)
- c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
+ if (cf->multihop < 0)
+ cf->multihop = internal ? 64 : 0;
- /* Disable after error incompatible with restart limit action */
- if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
- c->c.in_limit->action = PLA_DISABLE;
+ /* Link check for single-hop BGP by default */
+ if (cf->check_link < 0)
+ cf->check_link = !cf->multihop;
- if (!c->local_as)
+ if (!cf->local_as)
cf_error("Local AS number must be set");
- if (ipa_zero(c->remote_ip))
+ if (ipa_zero(cf->remote_ip))
cf_error("Neighbor must be configured");
- if (!c->remote_as)
+ if (!cf->remote_as)
cf_error("Remote AS number must be set");
- if (ipa_is_link_local(c->remote_ip) && !c->iface)
+ if (ipa_is_link_local(cf->remote_ip) && !cf->iface)
cf_error("Link-local neighbor address requires specified interface");
- if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
+ if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
cf_error("Neighbor AS number out of range (AS4 not available)");
- if (!internal && c->rr_client)
+ if (!internal && cf->rr_client)
cf_error("Only internal neighbor can be RR client");
- if (internal && c->rs_client)
+ if (internal && cf->rs_client)
cf_error("Only external neighbor can be RS client");
- if (c->multihop && (c->gw_mode == GW_DIRECT))
- cf_error("Multihop BGP cannot use direct gateway mode");
+ if (!cf->confederation && cf->confederation_member)
+ cf_error("Confederation ID must be set for member sessions");
- if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
- ipa_is_link_local(c->source_addr)))
+ if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
+ ipa_is_link_local(cf->remote_ip)))
cf_error("Multihop BGP cannot be used with link-local addresses");
- if (c->multihop && c->iface)
+ if (cf->multihop && cf->iface)
cf_error("Multihop BGP cannot be bound to interface");
- if (c->multihop && c->check_link)
+ if (cf->multihop && cf->check_link)
cf_error("Multihop BGP cannot depend on link state");
- if (c->multihop && c->bfd && ipa_zero(c->source_addr))
- cf_error("Multihop BGP with BFD requires specified source address");
+ if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
+ cf_error("Multihop BGP with BFD requires specified local address");
- if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
- cf_error("BGP in recursive mode prohibits sorted table");
- if (c->deterministic_med && c->c.table->sorted)
- cf_error("BGP with deterministic MED prohibits sorted table");
+ struct bgp_channel_config *cc;
+ WALK_LIST(cc, CF->channels)
+ {
+ /* Disable after error incompatible with restart limit action */
+ if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
+ cc->c.in_limit.action = PLA_DISABLE;
+
+ /* Different default based on rs_client */
+ if (!cc->missing_lladdr)
+ cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
+
+ /* Different default for gw_mode */
+ if (!cc->gw_mode)
+ cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
+
+ /* Default based on proto config */
+ if (cc->gr_able == 0xff)
+ cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
+
+ /* Default values of IGP tables */
+ if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
+ {
+ if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
+ cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
+
+ if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
+ cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
- if (c->secondary && !c->c.table->sorted)
- cf_error("BGP with secondary option requires sorted table");
+ if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
+ cf_error("Mismatched IGP table type");
+
+ if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
+ cf_error("Mismatched IGP table type");
+ }
+
+ if (cf->multihop && (cc->gw_mode == GW_DIRECT))
+ cf_error("Multihop BGP cannot use direct gateway mode");
+
+ if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
+ cf_error("BGP in recursive mode prohibits sorted table");
+
+ if (cf->deterministic_med && cc->c.table->sorted)
+ cf_error("BGP with deterministic MED prohibits sorted table");
+
+ if (cc->secondary && !cc->c.table->sorted)
+ cf_error("BGP with secondary option requires sorted table");
+ }
}
static int
-bgp_reconfigure(struct proto *P, struct proto_config *C)
+bgp_reconfigure(struct proto *P, struct proto_config *CF)
{
- struct bgp_config *new = (struct bgp_config *) C;
- struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_proto *p = (void *) P;
+ struct bgp_config *new = (void *) CF;
struct bgp_config *old = p->cf;
- if (proto_get_router_id(C) != p->local_id)
+ if (proto_get_router_id(CF) != p->local_id)
return 0;
int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
@@ -1399,8 +1738,26 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
// password item is last and must be checked separately
OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
&& ((!old->password && !new->password)
- || (old->password && new->password && !strcmp(old->password, new->password)))
- && (get_igp_table(old) == get_igp_table(new));
+ || (old->password && new->password && !strcmp(old->password, new->password)));
+
+ /* FIXME: Move channel reconfiguration to generic protocol code ? */
+ struct channel *C, *C2;
+ struct bgp_channel_config *cc;
+
+ WALK_LIST(C, p->p.channels)
+ C->stale = 1;
+
+ WALK_LIST(cc, new->c.channels)
+ {
+ C = (struct channel *) bgp_find_channel(p, cc->afi);
+ same = proto_configure_channel(P, &C, &cc->c) && same;
+ C->stale = 0;
+ }
+
+ WALK_LIST_DELSAFE(C, C2, p->p.channels)
+ if (C->stale)
+ same = proto_configure_channel(P, &C, NULL) && same;
+
if (same && (p->start_state > BSS_PREPARE))
bgp_update_bfd(p, new->bfd);
@@ -1412,11 +1769,34 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
return same;
}
+#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
+
+static int
+bgp_channel_reconfigure(struct channel *C, struct channel_config *CC)
+{
+ struct bgp_channel *c = (void *) C;
+ struct bgp_channel_config *new = (void *) CC;
+ struct bgp_channel_config *old = c->cf;
+
+ if (memcmp(((byte *) old) + sizeof(struct channel_config),
+ ((byte *) new) + sizeof(struct channel_config),
+ /* Remaining items must be checked separately */
+ OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config)))
+ return 0;
+
+ /* Check change in IGP tables */
+ if ((IGP_TABLE(old, ip4) != IGP_TABLE(new, ip4)) ||
+ (IGP_TABLE(old, ip6) != IGP_TABLE(new, ip6)))
+ return 0;
+
+ c->cf = new;
+ return 1;
+}
+
static void
-bgp_copy_config(struct proto_config *dest, struct proto_config *src)
+bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
{
/* Just a shallow copy */
- proto_copy_rest(dest, src, sizeof(struct bgp_config));
}
@@ -1433,14 +1813,14 @@ bgp_copy_config(struct proto_config *dest, struct proto_config *src)
* closes the connection.
*/
void
-bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
+bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
{
struct bgp_proto *p = c->bgp;
if (c->state == BS_CLOSE)
return;
- bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
+ bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
bgp_conn_enter_close_state(c);
@@ -1448,13 +1828,13 @@ bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int l
c->notify_subcode = subcode;
c->notify_data = data;
c->notify_size = (len > 0) ? len : 0;
- bgp_schedule_packet(c, PKT_NOTIFICATION);
+ bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
if (code != 6)
- {
- bgp_update_startup_delay(p);
- bgp_stop(p, 0, NULL, 0);
- }
+ {
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0, NULL, 0);
+ }
}
/**
@@ -1493,19 +1873,19 @@ static const char *
bgp_last_errmsg(struct bgp_proto *p)
{
switch (p->last_error_class)
- {
- case BE_MISC:
- return bgp_misc_errors[p->last_error_code];
- case BE_SOCKET:
- return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
- case BE_BGP_RX:
- case BE_BGP_TX:
- return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
- case BE_AUTO_DOWN:
- return bgp_auto_errors[p->last_error_code];
- default:
- return "";
- }
+ {
+ case BE_MISC:
+ return bgp_misc_errors[p->last_error_code];
+ case BE_SOCKET:
+ return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
+ case BE_BGP_RX:
+ case BE_BGP_TX:
+ return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
+ case BE_AUTO_DOWN:
+ return bgp_auto_errors[p->last_error_code];
+ default:
+ return "";
+ }
}
static const char *
@@ -1536,86 +1916,230 @@ bgp_get_status(struct proto *P, byte *buf)
}
static void
+bgp_show_afis(int code, char *s, u32 *afis, uint count)
+{
+ buffer b;
+ LOG_BUFFER_INIT(b);
+
+ buffer_puts(&b, s);
+
+ for (u32 *af = afis; af < (afis + count); af++)
+ {
+ const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
+ if (desc)
+ buffer_print(&b, " %s", desc->name);
+ else
+ buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
+ }
+
+ if (b.pos == b.end)
+ strcpy(b.end - 32, " ... <too long>");
+
+ cli_msg(code, b.start);
+}
+
+static void
+bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
+{
+ struct bgp_af_caps *ac;
+ uint any_mp_bgp = 0;
+ uint any_gr_able = 0;
+ uint any_add_path = 0;
+ uint any_ext_next_hop = 0;
+ u32 *afl1 = alloca(caps->af_count * sizeof(u32));
+ u32 *afl2 = alloca(caps->af_count * sizeof(u32));
+ uint afn1, afn2;
+
+ WALK_AF_CAPS(caps, ac)
+ {
+ any_mp_bgp |= ac->ready;
+ any_gr_able |= ac->gr_able;
+ any_add_path |= ac->add_path;
+ any_ext_next_hop |= ac->ext_next_hop;
+ }
+
+ if (any_mp_bgp)
+ {
+ cli_msg(-1006, " Multiprotocol");
+
+ afn1 = 0;
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ready)
+ afl1[afn1++] = ac->afi;
+
+ bgp_show_afis(-1006, " AF announced:", afl1, afn1);
+ }
+
+ if (caps->route_refresh)
+ cli_msg(-1006, " Route refresh");
+
+ if (any_ext_next_hop)
+ {
+ cli_msg(-1006, " Extended next hop");
+
+ afn1 = 0;
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ext_next_hop)
+ afl1[afn1++] = ac->afi;
+
+ bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
+ }
+
+ if (caps->ext_messages)
+ cli_msg(-1006, " Extended message");
+
+ if (caps->gr_aware)
+ cli_msg(-1006, " Graceful restart");
+
+ if (any_gr_able)
+ {
+ /* Continues from gr_aware */
+ cli_msg(-1006, " Restart time: %u", caps->gr_time);
+ if (caps->gr_flags & BGP_GRF_RESTART)
+ cli_msg(-1006, " Restart recovery");
+
+ afn1 = afn2 = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ if (ac->gr_able)
+ afl1[afn1++] = ac->afi;
+
+ if (ac->gr_af_flags & BGP_GRF_FORWARDING)
+ afl2[afn2++] = ac->afi;
+ }
+
+ bgp_show_afis(-1006, " AF supported:", afl1, afn1);
+ bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
+ }
+
+ if (caps->as4_support)
+ cli_msg(-1006, " 4-octet AS numbers");
+
+ if (any_add_path)
+ {
+ cli_msg(-1006, " ADD-PATH");
+
+ afn1 = afn2 = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ if (ac->add_path & BGP_ADD_PATH_RX)
+ afl1[afn1++] = ac->afi;
+
+ if (ac->add_path & BGP_ADD_PATH_TX)
+ afl2[afn2++] = ac->afi;
+ }
+
+ bgp_show_afis(-1006, " RX:", afl1, afn1);
+ bgp_show_afis(-1006, " TX:", afl2, afn2);
+ }
+
+ if (caps->enhanced_refresh)
+ cli_msg(-1006, " Enhanced refresh");
+}
+
+static void
bgp_show_proto_info(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- struct bgp_conn *c = p->conn;
-
- proto_show_basic_info(P);
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
- if (p->gr_active)
+ if (p->gr_active_num)
cli_msg(-1006, " Neighbor graceful restart active");
if (P->proto_state == PS_START)
- {
- struct bgp_conn *oc = &p->outgoing_conn;
+ {
+ struct bgp_conn *oc = &p->outgoing_conn;
- if ((p->start_state < BSS_CONNECT) &&
- (p->startup_timer->expires))
- cli_msg(-1006, " Error wait: %d/%d",
- p->startup_timer->expires - now, p->startup_delay);
+ if ((p->start_state < BSS_CONNECT) &&
+ (tm_active(p->startup_timer)))
+ cli_msg(-1006, " Error wait: %t/%u",
+ tm_remains(p->startup_timer), p->startup_delay);
- if ((oc->state == BS_ACTIVE) &&
- (oc->connect_retry_timer->expires))
- cli_msg(-1006, " Connect delay: %d/%d",
- oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
+ if ((oc->state == BS_ACTIVE) &&
+ (tm_active(oc->connect_timer)))
+ cli_msg(-1006, " Connect delay: %t/%u",
+ tm_remains(oc->connect_timer), p->cf->connect_delay_time);
- if (p->gr_active && p->gr_timer->expires)
- cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now);
- }
+ if (p->gr_active_num && tm_active(p->gr_timer))
+ cli_msg(-1006, " Restart timer: %t/-",
+ tm_remains(p->gr_timer));
+ }
else if (P->proto_state == PS_UP)
- {
- cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
- cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s",
- c->peer_refresh_support ? " refresh" : "",
- c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
- c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
- c->peer_as4_support ? " AS4" : "",
- (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
- (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
- c->peer_ext_messages_support ? " ext-messages" : "");
- cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
- p->is_internal ? "internal" : "external",
- p->cf->multihop ? " multihop" : "",
- p->rr_client ? " route-reflector" : "",
- p->rs_client ? " route-server" : "",
- p->as4_session ? " AS4" : "",
- p->add_path_rx ? " add-path-rx" : "",
- p->add_path_tx ? " add-path-tx" : "",
- p->ext_messages ? " ext-messages" : "");
- cli_msg(-1006, " Source address: %I", p->source_addr);
- if (P->cf->in_limit)
- cli_msg(-1006, " Route limit: %d/%d",
- p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
- cli_msg(-1006, " Hold timer: %d/%d",
- tm_remains(c->hold_timer), c->hold_time);
- cli_msg(-1006, " Keepalive timer: %d/%d",
- tm_remains(c->keepalive_timer), c->keepalive_time);
- }
+ {
+ cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
+ cli_msg(-1006, " Local capabilities");
+ bgp_show_capabilities(p, p->conn->local_caps);
+ cli_msg(-1006, " Neighbor capabilities");
+ bgp_show_capabilities(p, p->conn->remote_caps);
+ cli_msg(-1006, " Session: %s%s%s%s%s",
+ p->is_internal ? "internal" : "external",
+ p->cf->multihop ? " multihop" : "",
+ p->rr_client ? " route-reflector" : "",
+ p->rs_client ? " route-server" : "",
+ p->as4_session ? " AS4" : "");
+ cli_msg(-1006, " Source address: %I", p->source_addr);
+ cli_msg(-1006, " Hold timer: %t/%u",
+ tm_remains(p->conn->hold_timer), p->conn->hold_time);
+ cli_msg(-1006, " Keepalive timer: %t/%u",
+ tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
+ }
if ((p->last_error_class != BE_NONE) &&
(p->last_error_class != BE_MAN_DOWN))
+ {
+ const char *err1 = bgp_err_classes[p->last_error_class];
+ const char *err2 = bgp_last_errmsg(p);
+ cli_msg(-1006, " Last error: %s%s", err1, err2);
+ }
+
+ {
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
{
- const char *err1 = bgp_err_classes[p->last_error_class];
- const char *err2 = bgp_last_errmsg(p);
- cli_msg(-1006, " Last error: %s%s", err1, err2);
+ channel_show_info(&c->c);
+
+ if (c->c.channel_state == CS_UP)
+ {
+ if (ipa_zero(c->link_addr))
+ cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr);
+ else
+ cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr);
+ }
+
+ if (c->igp_table_ip4)
+ cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
+
+ if (c->igp_table_ip6)
+ cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
}
+ }
}
+struct channel_class channel_bgp = {
+ .channel_size = sizeof(struct bgp_channel),
+ .config_size = sizeof(struct bgp_channel_config),
+ .init = bgp_channel_init,
+ .start = bgp_channel_start,
+ .shutdown = bgp_channel_shutdown,
+ .cleanup = bgp_channel_cleanup,
+ .reconfigure = bgp_channel_reconfigure,
+};
+
struct protocol proto_bgp = {
.name = "BGP",
.template = "bgp%d",
.attr_class = EAP_BGP,
.preference = DEF_PREF_BGP,
+ .channel_mask = NB_IP | NB_VPN | NB_FLOW,
+ .proto_size = sizeof(struct bgp_proto),
.config_size = sizeof(struct bgp_config),
+ .postconfig = bgp_postconfig,
.init = bgp_init,
.start = bgp_start,
.shutdown = bgp_shutdown,
- .cleanup = bgp_cleanup,
.reconfigure = bgp_reconfigure,
.copy_config = bgp_copy_config,
.get_status = bgp_get_status,