summaryrefslogtreecommitdiff
path: root/proto
diff options
context:
space:
mode:
Diffstat (limited to 'proto')
-rw-r--r--proto/bgp/bgp.c146
-rw-r--r--proto/bgp/bgp.h23
-rw-r--r--proto/bgp/config.Y7
-rw-r--r--proto/bgp/packets.c150
4 files changed, 304 insertions, 22 deletions
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index a748669d..ca619f31 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -51,6 +51,16 @@
* and bgp_encode_attrs() which does the converse. Both functions are built around a
* @bgp_attr_table array describing all important characteristics of all known attributes.
* Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
+ *
+ * BGP protocol implements graceful restart in both restarting (local restart)
+ * and receiving (neighbor restart) roles. The first is handled mostly by the
+ * graceful restart code in the nest, BGP protocol just handles capabilities,
+ * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
+ * The second is implemented by internal restart of the BGP state to %BS_IDLE
+ * and protocol state to %PS_START, but keeping the protocol up from the core
+ * point of view and therefore maintaining received routes. Routing table
+ * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
+ * stale routes after reestablishment of BGP session during graceful restart.
*/
#undef LOCAL_DEBUG
@@ -319,6 +329,7 @@ bgp_decision(void *vp)
DBG("BGP: Decision start\n");
if ((p->p.proto_state == PS_START)
&& (p->outgoing_conn.state == BS_IDLE)
+ && (p->incoming_conn.state != BS_OPENCONFIRM)
&& (!p->cf->passive))
bgp_active(p);
@@ -363,7 +374,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
/* For multi-hop BGP sessions */
if (ipa_zero(p->source_addr))
- p->source_addr = conn->sk->saddr;
+ p->source_addr = conn->sk->saddr;
p->conn = conn;
p->last_error_class = 0;
@@ -371,6 +382,20 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
bgp_init_bucket_table(p);
bgp_init_prefix_table(p, 8);
+ int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
+
+ if (p->p.gr_recovery && !peer_gr_ready)
+ proto_graceful_restart_unlock(&p->p);
+
+ if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
+ p->p.gr_wait = 1;
+
+ if (p->gr_active)
+ tm_stop(p->gr_timer);
+
+ if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
+ bgp_graceful_restart_done(p);
+
bgp_conn_set_state(conn, BS_ESTABLISHED);
proto_notify_state(&p->p, PS_UP);
}
@@ -416,16 +441,86 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn)
bgp_conn_leave_established_state(p);
}
+/**
+ * bgp_handle_graceful_restart - handle detected BGP graceful restart
+ * @p: BGP instance
+ *
+ * This function is called when a BGP graceful restart of the neighbor is
+ * detected (when the TCP connection fails or when a new TCP connection
+ * appears). The function activates processing of the restart - starts routing
+ * table refresh cycle and activates BGP restart timer. The protocol state goes
+ * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
+ * caller.
+ */
+void
+bgp_handle_graceful_restart(struct bgp_proto *p)
+{
+ ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
+
+ BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
+ p->gr_active ? " - already pending" : "");
+ proto_notify_state(&p->p, PS_START);
+
+ if (p->gr_active)
+ rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+
+ p->gr_active = 1;
+ bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
+ rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
+}
+
+/**
+ * bgp_graceful_restart_done - finish active BGP graceful restart
+ * @p: BGP instance
+ *
+ * This function is called when the active BGP graceful restart of the neighbor
+ * should be finished - either successfully (the neighbor sends all paths and
+ * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
+ * not support BGP graceful restart on the new session). The function ends
+ * routing table refresh cycle and stops BGP restart timer.
+ */
+void
+bgp_graceful_restart_done(struct bgp_proto *p)
+{
+ BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
+ p->gr_active = 0;
+ tm_stop(p->gr_timer);
+ rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+}
+
+/**
+ * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
+ * @t: timer
+ *
+ * This function is a timeout hook for @gr_timer, implementing BGP restart time
+ * limit for reestablisment of the BGP session after the graceful restart. When
+ * fired, we just proceed with the usual protocol restart.
+ */
+
+static void
+bgp_graceful_restart_timeout(timer *t)
+{
+ struct bgp_proto *p = t->data;
+
+ BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
+ bgp_stop(p, 0);
+}
+
static void
bgp_send_open(struct bgp_conn *conn)
{
conn->start_state = conn->bgp->start_state;
// Default values, possibly changed by receiving capabilities.
+ conn->advertised_as = 0;
conn->peer_refresh_support = 0;
conn->peer_as4_support = 0;
conn->peer_add_path = 0;
- conn->advertised_as = 0;
+ conn->peer_gr_aware = 0;
+ conn->peer_gr_able = 0;
+ conn->peer_gr_time = 0;
+ conn->peer_gr_flags = 0;
+ conn->peer_gr_aflags = 0;
DBG("BGP: Sending open\n");
conn->sk->rx_hook = bgp_rx;
@@ -484,6 +579,9 @@ bgp_sock_err(sock *sk, int err)
else
BGP_TRACE(D_EVENTS, "Connection closed");
+ if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
+ bgp_handle_graceful_restart(p);
+
bgp_conn_enter_idle_state(conn);
}
@@ -649,6 +747,14 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED)
int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
+ if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
+ {
+ bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
+ bgp_handle_graceful_restart(p);
+ bgp_conn_enter_idle_state(p->conn);
+ acc = 1;
+ }
+
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL,
sk->dport, acc ? "accepted" : "rejected");
@@ -818,6 +924,17 @@ bgp_reload_routes(struct proto *P)
}
static void
+bgp_feed_done(struct proto *P)
+{
+ struct bgp_proto *p = (struct bgp_proto *) P;
+ if (!p->conn || !p->cf->gr_mode || p->p.refeeding)
+ return;
+
+ p->send_end_mark = 1;
+ bgp_schedule_packet(p->conn, PKT_UPDATE);
+}
+
+static void
bgp_start_locked(struct object_lock *lock)
{
struct bgp_proto *p = lock->data;
@@ -867,6 +984,8 @@ bgp_start(struct proto *P)
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
p->bfd_req = NULL;
+ p->gr_ready = 0;
+ p->gr_active = 0;
rt_lock_table(p->igp_table);
@@ -878,6 +997,10 @@ bgp_start(struct proto *P)
p->startup_timer->hook = bgp_startup_timeout;
p->startup_timer->data = p;
+ p->gr_timer = tm_new(p->p.pool);
+ p->gr_timer->hook = bgp_graceful_restart_timeout;
+ p->gr_timer->data = p;
+
p->local_id = proto_get_router_id(P->cf);
if (p->rr_client)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
@@ -885,6 +1008,9 @@ bgp_start(struct proto *P)
p->remote_id = 0;
p->source_addr = p->cf->source_addr;
+ if (p->p.gr_recovery && p->cf->gr_mode)
+ proto_graceful_restart_lock(P);
+
/*
* Before attempting to create the connection, we need to lock the
* port, so that are sure we're the only instance attempting to talk
@@ -985,6 +1111,7 @@ bgp_init(struct proto_config *C)
P->import_control = bgp_import_control;
P->neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
+ P->feed_done = bgp_feed_done;
P->rte_better = bgp_rte_better;
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
@@ -1164,7 +1291,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
-static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" };
+static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down", "Graceful restart"};
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
static const char *
@@ -1225,25 +1352,32 @@ bgp_show_proto_info(struct proto *P)
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
+ if (p->gr_active)
+ cli_msg(-1006, " Neighbor graceful restart active");
+
if (P->proto_state == PS_START)
{
struct bgp_conn *oc = &p->outgoing_conn;
if ((p->start_state < BSS_CONNECT) &&
(p->startup_timer->expires))
- cli_msg(-1006, " Error wait: %d/%d",
+ cli_msg(-1006, " Error wait: %d/%d",
p->startup_timer->expires - now, p->startup_delay);
if ((oc->state == BS_ACTIVE) &&
(oc->connect_retry_timer->expires))
- cli_msg(-1006, " Start delay: %d/%d",
+ cli_msg(-1006, " Start delay: %d/%d",
oc->connect_retry_timer->expires - now, p->cf->start_delay_time);
+
+ if (p->gr_active && p->gr_timer->expires)
+ cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now);
}
else if (P->proto_state == PS_UP)
{
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
- cli_msg(-1006, " Neighbor caps: %s%s%s%s",
+ cli_msg(-1006, " Neighbor caps: %s%s%s%s%s",
c->peer_refresh_support ? " refresh" : "",
+ c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
c->peer_as4_support ? " AS4" : "",
(c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
(c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "");
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 170b6bbe..da0114c2 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -48,6 +48,8 @@ struct bgp_config {
int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
int add_path; /* Use ADD-PATH extension [draft] */
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
+ int gr_mode; /* Graceful restart mode (BGP_GR_*) */
+ unsigned gr_time; /* Graceful restart timeout */
unsigned connect_retry_time;
unsigned hold_time, initial_hold_time;
unsigned keepalive_time;
@@ -73,6 +75,15 @@ struct bgp_config {
#define ADD_PATH_TX 2
#define ADD_PATH_FULL 3
+#define BGP_GR_ABLE 1
+#define BGP_GR_AWARE 2
+
+/* For peer_gr_flags */
+#define BGP_GRF_RESTART 0x80
+
+/* For peer_gr_aflags */
+#define BGP_GRF_FORWARDING 0x80
+
struct bgp_conn {
struct bgp_proto *bgp;
@@ -90,6 +101,11 @@ struct bgp_conn {
u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */
u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
u8 peer_add_path; /* Peer supports ADD-PATH [draft] */
+ u8 peer_gr_aware;
+ u8 peer_gr_able;
+ u16 peer_gr_time;
+ u8 peer_gr_flags;
+ u8 peer_gr_aflags;
unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
@@ -107,6 +123,8 @@ struct bgp_proto {
u32 rr_cluster_id; /* Route reflector cluster ID */
int rr_client; /* Whether neighbor is RR client of me */
int rs_client; /* Whether neighbor is RS client of me */
+ u8 gr_ready; /* Neighbor could do graceful restart */
+ u8 gr_active; /* Neighbor is doing graceful restart */
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
@@ -117,12 +135,14 @@ struct bgp_proto {
rtable *igp_table; /* Table used for recursive next hop lookups */
struct event *event; /* Event for respawning and shutting process */
struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
+ struct timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */
unsigned int hash_size, hash_count, hash_limit;
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
slab *prefix_slab; /* Slab holding prefix nodes */
list bucket_queue; /* Queue of buckets to send */
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ unsigned send_end_mark; /* End-of-RIB mark scheduled for transmit */
unsigned startup_delay; /* Time to delay protocol startup by due to errors */
bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */
u8 last_error_class; /* Error class of last error */
@@ -172,6 +192,8 @@ void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
void bgp_conn_enter_established_state(struct bgp_conn *conn);
void bgp_conn_enter_close_state(struct bgp_conn *conn);
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
+void bgp_handle_graceful_restart(struct bgp_proto *p);
+void bgp_graceful_restart_done(struct bgp_proto *p);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, unsigned subcode);
@@ -313,6 +335,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */
#define BEM_NO_SOCKET 4
#define BEM_BFD_DOWN 5
+#define BEM_GRACEFUL_RESTART 6
/* Automatic shutdown error codes */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 76a76470..6b885032 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
- SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX)
+ SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE)
CF_GRAMMAR
@@ -50,6 +50,8 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->advertise_ipv4 = 1;
BGP_CFG->interpret_communities = 1;
BGP_CFG->default_local_pref = 100;
+ BGP_CFG->gr_mode = BGP_GR_AWARE;
+ BGP_CFG->gr_time = 120;
}
;
@@ -115,6 +117,9 @@ bgp_proto:
| bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
| bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
+ | bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
+ | bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
+ | bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; }
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
| bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 649d8078..2d4da8c9 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -122,7 +122,7 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
#ifdef IPV6
static byte *
-bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
+bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
{
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
*buf++ = 4; /* Capability data length */
@@ -136,7 +136,7 @@ bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
#else
static byte *
-bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
+bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
{
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
*buf++ = 4; /* Capability data length */
@@ -149,7 +149,7 @@ bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
#endif
static byte *
-bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
+bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
{
*buf++ = 2; /* Capability 2: Support for route refresh */
*buf++ = 0; /* Capability data length */
@@ -157,16 +157,44 @@ bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
}
static byte *
-bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
+bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
+{
+ *buf++ = 64; /* Capability 64: Support for graceful restart */
+ *buf++ = 6; /* Capability data length */
+
+ put_u16(buf, p->cf->gr_time);
+ if (p->p.gr_recovery)
+ buf[0] |= BGP_GRF_RESTART;
+ buf += 2;
+
+ *buf++ = 0; /* Appropriate AF */
+ *buf++ = BGP_AF;
+ *buf++ = 1; /* and SAFI 1 */
+ *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
+
+ return buf;
+}
+
+static byte *
+bgp_put_cap_gr2(struct bgp_proto *p, byte *buf)
+{
+ *buf++ = 64; /* Capability 64: Support for graceful restart */
+ *buf++ = 2; /* Capability data length */
+ put_u16(buf, 0);
+ return buf + 2;
+}
+
+static byte *
+bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
{
*buf++ = 65; /* Capability 65: Support for 4-octet AS number */
*buf++ = 4; /* Capability data length */
- put_u32(buf, conn->bgp->local_as);
+ put_u32(buf, p->local_as);
return buf + 4;
}
static byte *
-bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf)
+bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
{
*buf++ = 69; /* Capability 69: Support for ADD-PATH */
*buf++ = 4; /* Capability data length */
@@ -175,7 +203,7 @@ bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf)
*buf++ = BGP_AF;
*buf++ = 1; /* SAFI 1 */
- *buf++ = conn->bgp->cf->add_path;
+ *buf++ = p->cf->add_path;
return buf;
}
@@ -206,21 +234,26 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
#ifndef IPV6
if (p->cf->advertise_ipv4)
- cap = bgp_put_cap_ipv4(conn, cap);
+ cap = bgp_put_cap_ipv4(p, cap);
#endif
#ifdef IPV6
- cap = bgp_put_cap_ipv6(conn, cap);
+ cap = bgp_put_cap_ipv6(p, cap);
#endif
if (p->cf->enable_refresh)
- cap = bgp_put_cap_rr(conn, cap);
+ cap = bgp_put_cap_rr(p, cap);
+
+ if (p->cf->gr_mode == BGP_GR_ABLE)
+ cap = bgp_put_cap_gr1(p, cap);
+ else if (p->cf->gr_mode == BGP_GR_AWARE)
+ cap = bgp_put_cap_gr2(p, cap);
if (p->cf->enable_as4)
- cap = bgp_put_cap_as4(conn, cap);
+ cap = bgp_put_cap_as4(p, cap);
if (p->cf->add_path)
- cap = bgp_put_cap_add_path(conn, cap);
+ cap = bgp_put_cap_add_path(p, cap);
cap_len = cap - buf - 12;
if (cap_len > 0)
@@ -351,6 +384,16 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
return NULL;
}
+static byte *
+bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+{
+ struct bgp_proto *p = conn->bgp;
+ BGP_TRACE(D_PACKETS, "Sending End-of-RIB");
+
+ put_u32(buf, 0);
+ return buf+4;
+}
+
#else /* IPv6 version */
static inline int
@@ -520,6 +563,26 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
return NULL;
}
+static byte *
+bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+{
+ struct bgp_proto *p = conn->bgp;
+ BGP_TRACE(D_PACKETS, "Sending End-of-RIB");
+
+ put_u16(buf+0, 0);
+ put_u16(buf+2, 6); /* length 4-9 */
+ buf += 4;
+
+ /* Empty MP_UNREACH_NLRI atribute */
+ *buf++ = BAF_OPTIONAL;
+ *buf++ = BA_MP_UNREACH_NLRI;
+ *buf++ = 3; /* Length 7-9 */
+ *buf++ = 0; /* AFI */
+ *buf++ = BGP_AF_IPV6;
+ *buf++ = 1; /* SAFI */
+ return buf;
+}
+
#endif
static byte *
@@ -606,10 +669,16 @@ bgp_fire_tx(struct bgp_conn *conn)
{
end = bgp_create_update(conn, pkt);
type = PKT_UPDATE;
+
if (!end)
{
conn->packets_to_send = 0;
- return 0;
+
+ if (!p->send_end_mark)
+ return 0;
+
+ p->send_end_mark = 0;
+ end = bgp_create_end_mark(conn, pkt);
}
}
else
@@ -678,6 +747,22 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
conn->peer_refresh_support = 1;
break;
+ case 64: /* Graceful restart capability, RFC 4724 */
+ if (cl % 4 != 2)
+ goto err;
+ conn->peer_gr_aware = 1;
+ conn->peer_gr_able = 0;
+ conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
+ conn->peer_gr_flags = opt[2] & 0xf0;
+ conn->peer_gr_aflags = 0;
+ for (i = 2; i < cl; i += 4)
+ if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
+ {
+ conn->peer_gr_able = 1;
+ conn->peer_gr_aflags = opt[2+i+3];
+ }
+ break;
+
case 65: /* AS4 capability, RFC 4893 */
if (cl != 4)
goto err;
@@ -704,7 +789,7 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
}
return;
- err:
+ err:
bgp_error(conn, 2, 0, NULL, 0);
return;
}
@@ -807,12 +892,17 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
switch (other->state)
{
- case BS_IDLE:
case BS_CONNECT:
case BS_ACTIVE:
+ /* Stop outgoing connection attempts */
+ bgp_conn_enter_idle_state(other);
+ break;
+
+ case BS_IDLE:
case BS_OPENSENT:
case BS_CLOSE:
break;
+
case BS_OPENCONFIRM:
if ((p->local_id < id) == (conn == &p->incoming_conn))
{
@@ -838,6 +928,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
+ p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
if (p->add_path_tx)
p->p.accept_ra_types = RA_ANY;
@@ -849,6 +940,20 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
bgp_conn_enter_openconfirm_state(conn);
}
+
+static inline void
+bgp_rx_end_mark(struct bgp_proto *p)
+{
+ BGP_TRACE(D_PACKETS, "Got End-of-RIB");
+
+ if (p->p.gr_recovery)
+ proto_graceful_restart_unlock(&p->p);
+
+ if (p->gr_active)
+ bgp_graceful_restart_done(p);
+}
+
+
#define DECODE_PREFIX(pp, ll) do { \
if (p->add_path_rx) \
{ \
@@ -983,6 +1088,13 @@ bgp_do_rx_update(struct bgp_conn *conn,
u32 path_id = 0;
u32 last_id = 0;
+ /* Check for End-of-RIB marker */
+ if (!withdrawn_len && !attr_len && !nlri_len)
+ {
+ bgp_rx_end_mark(p);
+ return;
+ }
+
/* Withdraw routes */
while (withdrawn_len)
{
@@ -1088,6 +1200,14 @@ bgp_do_rx_update(struct bgp_conn *conn,
if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
return;
+ /* Check for End-of-RIB marker */
+ if ((attr_len < 8) && !withdrawn_len && !attr_len &&
+ (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
+ {
+ bgp_rx_end_mark(p);
+ return;
+ }
+
DO_NLRI(mp_unreach)
{
while (len)