diff options
author | Ondrej Zajicek <santiago@crfreenet.org> | 2014-03-24 12:41:43 +0100 |
---|---|---|
committer | Ondrej Zajicek <santiago@crfreenet.org> | 2014-03-24 12:41:43 +0100 |
commit | c980f8002e0f0578d5e715d48d65d9fb9a0c5a70 (patch) | |
tree | f1d3be6b0992c7cb0be3694229ffb42dc740ae91 /proto | |
parent | 2e84b4e82dbf8fce0fd12fb0c25d925ffd287970 (diff) | |
parent | 227af309e55a59f14d1a5a757f17900164bffc97 (diff) |
Merge branch 'bgp-grace'
Diffstat (limited to 'proto')
-rw-r--r-- | proto/bgp/bgp.c | 146 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 23 | ||||
-rw-r--r-- | proto/bgp/config.Y | 7 | ||||
-rw-r--r-- | proto/bgp/packets.c | 150 |
4 files changed, 304 insertions, 22 deletions
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index a748669d..ca619f31 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -51,6 +51,16 @@ * and bgp_encode_attrs() which does the converse. Both functions are built around a * @bgp_attr_table array describing all important characteristics of all known attributes. * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams. + * + * BGP protocol implements graceful restart in both restarting (local restart) + * and receiving (neighbor restart) roles. The first is handled mostly by the + * graceful restart code in the nest, BGP protocol just handles capabilities, + * sets @gr_wait and locks graceful restart until end-of-RIB mark is received. + * The second is implemented by internal restart of the BGP state to %BS_IDLE + * and protocol state to %PS_START, but keeping the protocol up from the core + * point of view and therefore maintaining received routes. Routing table + * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing + * stale routes after reestablishment of BGP session during graceful restart. */ #undef LOCAL_DEBUG @@ -319,6 +329,7 @@ bgp_decision(void *vp) DBG("BGP: Decision start\n"); if ((p->p.proto_state == PS_START) && (p->outgoing_conn.state == BS_IDLE) + && (p->incoming_conn.state != BS_OPENCONFIRM) && (!p->cf->passive)) bgp_active(p); @@ -363,7 +374,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) /* For multi-hop BGP sessions */ if (ipa_zero(p->source_addr)) - p->source_addr = conn->sk->saddr; + p->source_addr = conn->sk->saddr; p->conn = conn; p->last_error_class = 0; @@ -371,6 +382,20 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) bgp_init_bucket_table(p); bgp_init_prefix_table(p, 8); + int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART); + + if (p->p.gr_recovery && !peer_gr_ready) + proto_graceful_restart_unlock(&p->p); + + if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready) + p->p.gr_wait = 1; + + if (p->gr_active) + tm_stop(p->gr_timer); + + if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING))) + bgp_graceful_restart_done(p); + bgp_conn_set_state(conn, BS_ESTABLISHED); proto_notify_state(&p->p, PS_UP); } @@ -416,16 +441,86 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn) bgp_conn_leave_established_state(p); } +/** + * bgp_handle_graceful_restart - handle detected BGP graceful restart + * @p: BGP instance + * + * This function is called when a BGP graceful restart of the neighbor is + * detected (when the TCP connection fails or when a new TCP connection + * appears). The function activates processing of the restart - starts routing + * table refresh cycle and activates BGP restart timer. The protocol state goes + * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the + * caller. + */ +void +bgp_handle_graceful_restart(struct bgp_proto *p) +{ + ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready); + + BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s", + p->gr_active ? " - already pending" : ""); + proto_notify_state(&p->p, PS_START); + + if (p->gr_active) + rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); + + p->gr_active = 1; + bgp_start_timer(p->gr_timer, p->conn->peer_gr_time); + rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook); +} + +/** + * bgp_graceful_restart_done - finish active BGP graceful restart + * @p: BGP instance + * + * This function is called when the active BGP graceful restart of the neighbor + * should be finished - either successfully (the neighbor sends all paths and + * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does + * not support BGP graceful restart on the new session). The function ends + * routing table refresh cycle and stops BGP restart timer. + */ +void +bgp_graceful_restart_done(struct bgp_proto *p) +{ + BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); + p->gr_active = 0; + tm_stop(p->gr_timer); + rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); +} + +/** + * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer' + * @t: timer + * + * This function is a timeout hook for @gr_timer, implementing BGP restart time + * limit for reestablisment of the BGP session after the graceful restart. When + * fired, we just proceed with the usual protocol restart. + */ + +static void +bgp_graceful_restart_timeout(timer *t) +{ + struct bgp_proto *p = t->data; + + BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout"); + bgp_stop(p, 0); +} + static void bgp_send_open(struct bgp_conn *conn) { conn->start_state = conn->bgp->start_state; // Default values, possibly changed by receiving capabilities. + conn->advertised_as = 0; conn->peer_refresh_support = 0; conn->peer_as4_support = 0; conn->peer_add_path = 0; - conn->advertised_as = 0; + conn->peer_gr_aware = 0; + conn->peer_gr_able = 0; + conn->peer_gr_time = 0; + conn->peer_gr_flags = 0; + conn->peer_gr_aflags = 0; DBG("BGP: Sending open\n"); conn->sk->rx_hook = bgp_rx; @@ -484,6 +579,9 @@ bgp_sock_err(sock *sk, int err) else BGP_TRACE(D_EVENTS, "Connection closed"); + if ((conn->state == BS_ESTABLISHED) && p->gr_ready) + bgp_handle_graceful_restart(p); + bgp_conn_enter_idle_state(conn); } @@ -649,6 +747,14 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk); + if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready) + { + bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART); + bgp_handle_graceful_restart(p); + bgp_conn_enter_idle_state(p->conn); + acc = 1; + } + BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s", sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL, sk->dport, acc ? "accepted" : "rejected"); @@ -818,6 +924,17 @@ bgp_reload_routes(struct proto *P) } static void +bgp_feed_done(struct proto *P) +{ + struct bgp_proto *p = (struct bgp_proto *) P; + if (!p->conn || !p->cf->gr_mode || p->p.refeeding) + return; + + p->send_end_mark = 1; + bgp_schedule_packet(p->conn, PKT_UPDATE); +} + +static void bgp_start_locked(struct object_lock *lock) { struct bgp_proto *p = lock->data; @@ -867,6 +984,8 @@ bgp_start(struct proto *P) p->incoming_conn.state = BS_IDLE; p->neigh = NULL; p->bfd_req = NULL; + p->gr_ready = 0; + p->gr_active = 0; rt_lock_table(p->igp_table); @@ -878,6 +997,10 @@ bgp_start(struct proto *P) p->startup_timer->hook = bgp_startup_timeout; p->startup_timer->data = p; + p->gr_timer = tm_new(p->p.pool); + p->gr_timer->hook = bgp_graceful_restart_timeout; + p->gr_timer->data = p; + p->local_id = proto_get_router_id(P->cf); if (p->rr_client) p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; @@ -885,6 +1008,9 @@ bgp_start(struct proto *P) p->remote_id = 0; p->source_addr = p->cf->source_addr; + if (p->p.gr_recovery && p->cf->gr_mode) + proto_graceful_restart_lock(P); + /* * Before attempting to create the connection, we need to lock the * port, so that are sure we're the only instance attempting to talk @@ -985,6 +1111,7 @@ bgp_init(struct proto_config *C) P->import_control = bgp_import_control; P->neigh_notify = bgp_neigh_notify; P->reload_routes = bgp_reload_routes; + P->feed_done = bgp_feed_done; P->rte_better = bgp_rte_better; P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL; @@ -1164,7 +1291,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code) static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""}; -static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" }; +static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down", "Graceful restart"}; static char *bgp_auto_errors[] = { "", "Route limit exceeded"}; static const char * @@ -1225,25 +1352,32 @@ bgp_show_proto_info(struct proto *P) cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface); cli_msg(-1006, " Neighbor AS: %u", p->remote_as); + if (p->gr_active) + cli_msg(-1006, " Neighbor graceful restart active"); + if (P->proto_state == PS_START) { struct bgp_conn *oc = &p->outgoing_conn; if ((p->start_state < BSS_CONNECT) && (p->startup_timer->expires)) - cli_msg(-1006, " Error wait: %d/%d", + cli_msg(-1006, " Error wait: %d/%d", p->startup_timer->expires - now, p->startup_delay); if ((oc->state == BS_ACTIVE) && (oc->connect_retry_timer->expires)) - cli_msg(-1006, " Start delay: %d/%d", + cli_msg(-1006, " Start delay: %d/%d", oc->connect_retry_timer->expires - now, p->cf->start_delay_time); + + if (p->gr_active && p->gr_timer->expires) + cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now); } else if (P->proto_state == PS_UP) { cli_msg(-1006, " Neighbor ID: %R", p->remote_id); - cli_msg(-1006, " Neighbor caps: %s%s%s%s", + cli_msg(-1006, " Neighbor caps: %s%s%s%s%s", c->peer_refresh_support ? " refresh" : "", + c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""), c->peer_as4_support ? " AS4" : "", (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "", (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : ""); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 170b6bbe..da0114c2 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -48,6 +48,8 @@ struct bgp_config { int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ int add_path; /* Use ADD-PATH extension [draft] */ int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ + int gr_mode; /* Graceful restart mode (BGP_GR_*) */ + unsigned gr_time; /* Graceful restart timeout */ unsigned connect_retry_time; unsigned hold_time, initial_hold_time; unsigned keepalive_time; @@ -73,6 +75,15 @@ struct bgp_config { #define ADD_PATH_TX 2 #define ADD_PATH_FULL 3 +#define BGP_GR_ABLE 1 +#define BGP_GR_AWARE 2 + +/* For peer_gr_flags */ +#define BGP_GRF_RESTART 0x80 + +/* For peer_gr_aflags */ +#define BGP_GRF_FORWARDING 0x80 + struct bgp_conn { struct bgp_proto *bgp; @@ -90,6 +101,11 @@ struct bgp_conn { u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */ u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */ u8 peer_add_path; /* Peer supports ADD-PATH [draft] */ + u8 peer_gr_aware; + u8 peer_gr_able; + u16 peer_gr_time; + u8 peer_gr_flags; + u8 peer_gr_aflags; unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; @@ -107,6 +123,8 @@ struct bgp_proto { u32 rr_cluster_id; /* Route reflector cluster ID */ int rr_client; /* Whether neighbor is RR client of me */ int rs_client; /* Whether neighbor is RS client of me */ + u8 gr_ready; /* Neighbor could do graceful restart */ + u8 gr_active; /* Neighbor is doing graceful restart */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ @@ -117,12 +135,14 @@ struct bgp_proto { rtable *igp_table; /* Table used for recursive next hop lookups */ struct event *event; /* Event for respawning and shutting process */ struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ + struct timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ unsigned int hash_size, hash_count, hash_limit; HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ slab *prefix_slab; /* Slab holding prefix nodes */ list bucket_queue; /* Queue of buckets to send */ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ + unsigned send_end_mark; /* End-of-RIB mark scheduled for transmit */ unsigned startup_delay; /* Time to delay protocol startup by due to errors */ bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ @@ -172,6 +192,8 @@ void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn); void bgp_conn_enter_established_state(struct bgp_conn *conn); void bgp_conn_enter_close_state(struct bgp_conn *conn); void bgp_conn_enter_idle_state(struct bgp_conn *conn); +void bgp_handle_graceful_restart(struct bgp_proto *p); +void bgp_graceful_restart_done(struct bgp_proto *p); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); void bgp_stop(struct bgp_proto *p, unsigned subcode); @@ -313,6 +335,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */ #define BEM_NO_SOCKET 4 #define BEM_BFD_DOWN 5 +#define BEM_GRACEFUL_RESTART 6 /* Automatic shutdown error codes */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 76a76470..6b885032 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, - SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX) + SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE) CF_GRAMMAR @@ -50,6 +50,8 @@ bgp_proto_start: proto_start BGP { BGP_CFG->advertise_ipv4 = 1; BGP_CFG->interpret_communities = 1; BGP_CFG->default_local_pref = 100; + BGP_CFG->gr_mode = BGP_GR_AWARE; + BGP_CFG->gr_time = 120; } ; @@ -115,6 +117,9 @@ bgp_proto: | bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; } | bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; } | bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; } + | bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; } + | bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; } + | bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; } | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; } | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; } | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 649d8078..2d4da8c9 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -122,7 +122,7 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf) #ifdef IPV6 static byte * -bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf) +bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf) { *buf++ = 1; /* Capability 1: Multiprotocol extensions */ *buf++ = 4; /* Capability data length */ @@ -136,7 +136,7 @@ bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf) #else static byte * -bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf) +bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf) { *buf++ = 1; /* Capability 1: Multiprotocol extensions */ *buf++ = 4; /* Capability data length */ @@ -149,7 +149,7 @@ bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf) #endif static byte * -bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf) +bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf) { *buf++ = 2; /* Capability 2: Support for route refresh */ *buf++ = 0; /* Capability data length */ @@ -157,16 +157,44 @@ bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf) } static byte * -bgp_put_cap_as4(struct bgp_conn *conn, byte *buf) +bgp_put_cap_gr1(struct bgp_proto *p, byte *buf) +{ + *buf++ = 64; /* Capability 64: Support for graceful restart */ + *buf++ = 6; /* Capability data length */ + + put_u16(buf, p->cf->gr_time); + if (p->p.gr_recovery) + buf[0] |= BGP_GRF_RESTART; + buf += 2; + + *buf++ = 0; /* Appropriate AF */ + *buf++ = BGP_AF; + *buf++ = 1; /* and SAFI 1 */ + *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0; + + return buf; +} + +static byte * +bgp_put_cap_gr2(struct bgp_proto *p, byte *buf) +{ + *buf++ = 64; /* Capability 64: Support for graceful restart */ + *buf++ = 2; /* Capability data length */ + put_u16(buf, 0); + return buf + 2; +} + +static byte * +bgp_put_cap_as4(struct bgp_proto *p, byte *buf) { *buf++ = 65; /* Capability 65: Support for 4-octet AS number */ *buf++ = 4; /* Capability data length */ - put_u32(buf, conn->bgp->local_as); + put_u32(buf, p->local_as); return buf + 4; } static byte * -bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf) +bgp_put_cap_add_path(struct bgp_proto *p, byte *buf) { *buf++ = 69; /* Capability 69: Support for ADD-PATH */ *buf++ = 4; /* Capability data length */ @@ -175,7 +203,7 @@ bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf) *buf++ = BGP_AF; *buf++ = 1; /* SAFI 1 */ - *buf++ = conn->bgp->cf->add_path; + *buf++ = p->cf->add_path; return buf; } @@ -206,21 +234,26 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) #ifndef IPV6 if (p->cf->advertise_ipv4) - cap = bgp_put_cap_ipv4(conn, cap); + cap = bgp_put_cap_ipv4(p, cap); #endif #ifdef IPV6 - cap = bgp_put_cap_ipv6(conn, cap); + cap = bgp_put_cap_ipv6(p, cap); #endif if (p->cf->enable_refresh) - cap = bgp_put_cap_rr(conn, cap); + cap = bgp_put_cap_rr(p, cap); + + if (p->cf->gr_mode == BGP_GR_ABLE) + cap = bgp_put_cap_gr1(p, cap); + else if (p->cf->gr_mode == BGP_GR_AWARE) + cap = bgp_put_cap_gr2(p, cap); if (p->cf->enable_as4) - cap = bgp_put_cap_as4(conn, cap); + cap = bgp_put_cap_as4(p, cap); if (p->cf->add_path) - cap = bgp_put_cap_add_path(conn, cap); + cap = bgp_put_cap_add_path(p, cap); cap_len = cap - buf - 12; if (cap_len > 0) @@ -351,6 +384,16 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) return NULL; } +static byte * +bgp_create_end_mark(struct bgp_conn *conn, byte *buf) +{ + struct bgp_proto *p = conn->bgp; + BGP_TRACE(D_PACKETS, "Sending End-of-RIB"); + + put_u32(buf, 0); + return buf+4; +} + #else /* IPv6 version */ static inline int @@ -520,6 +563,26 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) return NULL; } +static byte * +bgp_create_end_mark(struct bgp_conn *conn, byte *buf) +{ + struct bgp_proto *p = conn->bgp; + BGP_TRACE(D_PACKETS, "Sending End-of-RIB"); + + put_u16(buf+0, 0); + put_u16(buf+2, 6); /* length 4-9 */ + buf += 4; + + /* Empty MP_UNREACH_NLRI atribute */ + *buf++ = BAF_OPTIONAL; + *buf++ = BA_MP_UNREACH_NLRI; + *buf++ = 3; /* Length 7-9 */ + *buf++ = 0; /* AFI */ + *buf++ = BGP_AF_IPV6; + *buf++ = 1; /* SAFI */ + return buf; +} + #endif static byte * @@ -606,10 +669,16 @@ bgp_fire_tx(struct bgp_conn *conn) { end = bgp_create_update(conn, pkt); type = PKT_UPDATE; + if (!end) { conn->packets_to_send = 0; - return 0; + + if (!p->send_end_mark) + return 0; + + p->send_end_mark = 0; + end = bgp_create_end_mark(conn, pkt); } } else @@ -678,6 +747,22 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_refresh_support = 1; break; + case 64: /* Graceful restart capability, RFC 4724 */ + if (cl % 4 != 2) + goto err; + conn->peer_gr_aware = 1; + conn->peer_gr_able = 0; + conn->peer_gr_time = get_u16(opt + 2) & 0x0fff; + conn->peer_gr_flags = opt[2] & 0xf0; + conn->peer_gr_aflags = 0; + for (i = 2; i < cl; i += 4) + if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */ + { + conn->peer_gr_able = 1; + conn->peer_gr_aflags = opt[2+i+3]; + } + break; + case 65: /* AS4 capability, RFC 4893 */ if (cl != 4) goto err; @@ -704,7 +789,7 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) } return; - err: + err: bgp_error(conn, 2, 0, NULL, 0); return; } @@ -807,12 +892,17 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn; switch (other->state) { - case BS_IDLE: case BS_CONNECT: case BS_ACTIVE: + /* Stop outgoing connection attempts */ + bgp_conn_enter_idle_state(other); + break; + + case BS_IDLE: case BS_OPENSENT: case BS_CLOSE: break; + case BS_OPENCONFIRM: if ((p->local_id < id) == (conn == &p->incoming_conn)) { @@ -838,6 +928,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) p->as4_session = p->cf->enable_as4 && conn->peer_as4_support; p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX); p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX); + p->gr_ready = p->cf->gr_mode && conn->peer_gr_able; if (p->add_path_tx) p->p.accept_ra_types = RA_ANY; @@ -849,6 +940,20 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) bgp_conn_enter_openconfirm_state(conn); } + +static inline void +bgp_rx_end_mark(struct bgp_proto *p) +{ + BGP_TRACE(D_PACKETS, "Got End-of-RIB"); + + if (p->p.gr_recovery) + proto_graceful_restart_unlock(&p->p); + + if (p->gr_active) + bgp_graceful_restart_done(p); +} + + #define DECODE_PREFIX(pp, ll) do { \ if (p->add_path_rx) \ { \ @@ -983,6 +1088,13 @@ bgp_do_rx_update(struct bgp_conn *conn, u32 path_id = 0; u32 last_id = 0; + /* Check for End-of-RIB marker */ + if (!withdrawn_len && !attr_len && !nlri_len) + { + bgp_rx_end_mark(p); + return; + } + /* Withdraw routes */ while (withdrawn_len) { @@ -1088,6 +1200,14 @@ bgp_do_rx_update(struct bgp_conn *conn, if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */ return; + /* Check for End-of-RIB marker */ + if ((attr_len < 8) && !withdrawn_len && !attr_len && + (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6)) + { + bgp_rx_end_mark(p); + return; + } + DO_NLRI(mp_unreach) { while (len) |