diff options
author | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2018-07-31 18:40:38 +0200 |
---|---|---|
committer | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2018-07-31 18:40:38 +0200 |
commit | 5bd734317c05008a66eefaa14fc98a6d533cf9ef (patch) | |
tree | afe7e898b8eca7916b52c71eaa0b133c2809f03c /proto | |
parent | 318acb0f6cb77a32aad5d7f79e06f3c5065ac702 (diff) |
BGP: Long-lived graceful restart
The patch implements long-lived graceful restart for BGP, namely
draft-uttaro-idr-bgp-persistence-03.
Diffstat (limited to 'proto')
-rw-r--r-- | proto/bgp/attrs.c | 55 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 170 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 32 | ||||
-rw-r--r-- | proto/bgp/config.Y | 13 | ||||
-rw-r--r-- | proto/bgp/packets.c | 75 | ||||
-rw-r--r-- | proto/pipe/pipe.c | 6 |
6 files changed, 329 insertions, 22 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 6580db57..dc267fdb 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1413,6 +1413,10 @@ bgp_import_control(struct proto *P, rte **new, struct linpool *pool UNUSED) /* Do not export outside of AS (or confederation) */ if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT)) return -1; + + /* Do not export LLGR_STALE routes to LLGR-ignorant peers */ + if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE)) + return -1; } return 0; @@ -1580,6 +1584,19 @@ rte_resolvable(rte *rt) return rt->attrs->dest == RTD_UNICAST; } +static inline int +rte_stale(rte *r) +{ + if (r->u.bgp.stale < 0) + { + /* If staleness is unknown, compute and cache it */ + eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE); + } + + return r->u.bgp.stale; +} + int bgp_rte_better(rte *new, rte *old) { @@ -1604,7 +1621,15 @@ bgp_rte_better(rte *new, rte *old) if (n < o) return 0; - /* Start with local preferences */ + /* LLGR draft - depreference stale routes */ + n = rte_stale(new); + o = rte_stale(old); + if (n > o) + return 0; + if (n < o) + return 1; + + /* Start with local preferences */ x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); n = x ? x->u.data : new_bgp->cf->default_local_pref; @@ -1725,6 +1750,10 @@ bgp_rte_mergable(rte *pri, rte *sec) if (!rte_resolvable(sec)) return 0; + /* LLGR draft - depreference stale routes */ + if (rte_stale(pri) != rte_stale(sec)) + return 0; + /* Start with local preferences */ x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); @@ -1926,6 +1955,27 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return old_is_group_best; } +struct rte * +bgp_rte_modify_stale(struct rte *r, struct linpool *pool) +{ + eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + struct adata *ad = a ? a->u.ptr : NULL; + uint flags = a ? a->flags : BAF_PARTIAL; + + if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + return NULL; + + if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + return r; + + r = rte_cow_rta(r, pool); + bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags, + int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); + r->u.bgp.stale = 1; + + return r; +} + /* * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3 @@ -2011,6 +2061,9 @@ bgp_get_route_info(rte *e, byte *buf) if (e->u.bgp.suppressed) buf += bsprintf(buf, "-"); + if (rte_stale(e)) + buf += bsprintf(buf, "s"); + if (e->attrs->hostentry) { if (!rte_resolvable(e)) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index ced83c5c..6dea88c8 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -513,8 +513,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) p->route_refresh = peer->route_refresh; p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh; - /* Whether we may handle possible GR of peer (it has some AF GR-able) */ - p->gr_ready = 0; /* Updated later */ + /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */ + p->gr_ready = p->llgr_ready = 0; /* Updated later */ /* Whether peer is ready to handle our GR recovery */ int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART); @@ -547,8 +547,15 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) c->load_state = BFS_NONE; /* Channels where peer may do GR */ - c->gr_ready = active && local->gr_aware && rem->gr_able; + uint gr_ready = active && local->gr_aware && rem->gr_able; + uint llgr_ready = active && local->llgr_aware && rem->llgr_able; + + c->gr_ready = gr_ready || llgr_ready; p->gr_ready = p->gr_ready || c->gr_ready; + p->llgr_ready = p->llgr_ready || llgr_ready; + + /* Remember last LLGR stale time */ + c->stale_time = local->llgr_aware ? rem->llgr_time : 0; /* Channels not able to recover gracefully */ if (p->p.gr_recovery && (!active || !peer_gr_ready)) @@ -558,8 +565,14 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) if (p->p.gr_recovery && loc->gr_able && peer_gr_ready) c->c.gr_wait = 1; - /* Channels where peer is not able to recover gracefully */ - if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING))) + /* Channels where regular graceful restart failed */ + if ((c->gr_active == BGP_GRS_ACTIVE) && + !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING))) + bgp_graceful_restart_done(c); + + /* Channels where regular long-lived restart failed */ + if ((c->gr_active == BGP_GRS_LLGR) && + !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING))) bgp_graceful_restart_done(c); /* GR capability implies that neighbor will send End-of-RIB */ @@ -669,12 +682,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p) if (c->gr_ready) { - if (c->gr_active) + p->gr_active_num++; + + switch (c->gr_active) + { + case BGP_GRS_NONE: + c->gr_active = BGP_GRS_ACTIVE; + rt_refresh_begin(c->c.table, &c->c); + break; + + case BGP_GRS_ACTIVE: rt_refresh_end(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c); + break; - c->gr_active = 1; - p->gr_active_num++; - rt_refresh_begin(c->c.table, &c->c); + case BGP_GRS_LLGR: + rt_refresh_begin(c->c.table, &c->c); + rt_modify_stale(c->c.table, &c->c); + break; + } } else { @@ -695,7 +721,7 @@ bgp_handle_graceful_restart(struct bgp_proto *p) ASSERT(p->gr_active_num > 0); proto_notify_state(&p->p, PS_START); - bgp_start_timer(p->gr_timer, p->conn->remote_caps->gr_time); + tm_start(p->gr_timer, p->conn->remote_caps->gr_time S); } /** @@ -720,6 +746,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) if (!p->gr_active_num) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); + tm_stop(c->stale_timer); rt_refresh_end(c->c.table, &c->c); } @@ -738,7 +765,46 @@ bgp_graceful_restart_timeout(timer *t) struct bgp_proto *p = t->data; BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout"); - bgp_stop(p, 0, NULL, 0); + + if (p->llgr_ready) + { + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) + { + /* Channel is not in GR and is already flushed */ + if (!c->gr_active) + continue; + + /* Channel is already in LLGR from past restart */ + if (c->gr_active == BGP_GRS_LLGR) + continue; + + /* Channel is in GR, but does not support LLGR -> stop GR */ + if (!c->stale_time) + { + bgp_graceful_restart_done(c); + continue; + } + + /* Channel is in GR, and supports LLGR -> start LLGR */ + c->gr_active = BGP_GRS_LLGR; + tm_start(c->stale_timer, c->stale_time S); + rt_modify_stale(c->c.table, &c->c); + } + } + else + bgp_stop(p, 0, NULL, 0); +} + +static void +bgp_long_lived_stale_timeout(timer *t) +{ + struct bgp_channel *c = t->data; + struct bgp_proto *p = (void *) c->c.proto; + + BGP_TRACE(D_EVENTS, "Long-lived stale timeout"); + + bgp_graceful_restart_done(c); } @@ -873,6 +939,12 @@ bgp_hold_timeout(timer *t) if (sk_rx_ready(conn->sk) > 0) bgp_start_timer(conn->hold_timer, 10); + else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready) + { + BGP_TRACE(D_EVENTS, "Hold timer expired"); + bgp_handle_graceful_restart(p); + bgp_conn_enter_idle_state(conn); + } else bgp_error(conn, 4, 0, NULL, 0); } @@ -1172,9 +1244,26 @@ bgp_bfd_notify(struct bfd_request *req) { BGP_TRACE(D_EVENTS, "BFD session down"); bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN); - if (ps == PS_UP) - bgp_update_startup_delay(p); - bgp_stop(p, 0, NULL, 0); + + if (p->cf->bfd == BGP_BFD_GRACEFUL) + { + /* Trigger graceful restart */ + if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready) + bgp_handle_graceful_restart(p); + + if (p->incoming_conn.state > BS_IDLE) + bgp_conn_enter_idle_state(&p->incoming_conn); + + if (p->outgoing_conn.state > BS_IDLE) + bgp_conn_enter_idle_state(&p->outgoing_conn); + } + else + { + /* Trigger session down */ + if (ps == PS_UP) + bgp_update_startup_delay(p); + bgp_stop(p, 0, NULL, 0); + } } } @@ -1447,6 +1536,7 @@ bgp_init(struct proto_config *CF) P->rte_better = bgp_rte_better; P->rte_mergable = bgp_rte_mergable; P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; + P->rte_modify = bgp_rte_modify_stale; p->cf = cf; p->local_as = cf->local_as; @@ -1503,6 +1593,8 @@ bgp_channel_start(struct channel *C) bgp_init_bucket_table(c); bgp_init_prefix_table(c); + c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); + c->next_hop_addr = c->cf->next_hop_addr; c->link_addr = IPA_NONE; c->packets_to_send = 0; @@ -1634,6 +1726,10 @@ bgp_postconfig(struct proto_config *CF) if (cf->multihop < 0) cf->multihop = internal ? 64 : 0; + /* LLGR mode default based on GR mode */ + if (cf->llgr_mode < 0) + cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0; + /* Link check for single-hop BGP by default */ if (cf->check_link < 0) cf->check_link = !cf->multihop; @@ -1676,6 +1772,9 @@ bgp_postconfig(struct proto_config *CF) if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip)) cf_error("Multihop BGP with BFD requires specified local address"); + if (!cf->gr_mode && cf->llgr_mode) + cf_error("Long-lived graceful restart requires basic graceful restart"); + struct bgp_channel_config *cc; WALK_LIST(cc, CF->channels) @@ -1706,10 +1805,16 @@ bgp_postconfig(struct proto_config *CF) if (!cc->gw_mode) cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT; - /* Default based on proto config */ + /* Defaults based on proto config */ if (cc->gr_able == 0xff) cc->gr_able = (cf->gr_mode == BGP_GR_ABLE); + if (cc->llgr_able == 0xff) + cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE); + + if (cc->llgr_time == ~0U) + cc->llgr_time = cf->llgr_time; + /* Default values of IGP tables */ if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp) { @@ -1885,6 +1990,7 @@ static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "Ope static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""}; static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"}; static char *bgp_auto_errors[] = { "", "Route limit exceeded"}; +static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"}; static const char * bgp_last_errmsg(struct bgp_proto *p) @@ -1963,6 +2069,7 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) uint any_gr_able = 0; uint any_add_path = 0; uint any_ext_next_hop = 0; + uint any_llgr_able = 0; u32 *afl1 = alloca(caps->af_count * sizeof(u32)); u32 *afl2 = alloca(caps->af_count * sizeof(u32)); uint afn1, afn2; @@ -1973,6 +2080,7 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) any_gr_able |= ac->gr_able; any_add_path |= ac->add_path; any_ext_next_hop |= ac->ext_next_hop; + any_llgr_able |= ac->llgr_able; } if (any_mp_bgp) @@ -2052,6 +2160,32 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) if (caps->enhanced_refresh) cli_msg(-1006, " Enhanced refresh"); + + if (caps->llgr_aware) + cli_msg(-1006, " Long-lived graceful restart"); + + if (any_llgr_able) + { + u32 stale_time = 0; + + afn1 = afn2 = 0; + WALK_AF_CAPS(caps, ac) + { + stale_time = MAX(stale_time, ac->llgr_time); + + if (ac->llgr_able && ac->llgr_time) + afl1[afn1++] = ac->afi; + + if (ac->llgr_flags & BGP_GRF_FORWARDING) + afl2[afn2++] = ac->afi; + } + + /* Continues from llgr_aware */ + cli_msg(-1006, " LL stale time: %u", stale_time); + + bgp_show_afis(-1006, " AF supported:", afl1, afn1); + bgp_show_afis(-1006, " AF preserved:", afl2, afn2); + } } static void @@ -2118,6 +2252,12 @@ bgp_show_proto_info(struct proto *P) { channel_show_info(&c->c); + if (p->gr_active_num) + cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]); + + if (tm_active(c->stale_timer)) + cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer)); + if (c->c.channel_state == CS_UP) { if (ipa_zero(c->link_addr)) diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 1235ee78..6f0a5587 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -107,9 +107,11 @@ struct bgp_config { int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */ int gr_mode; /* Graceful restart mode (BGP_GR_*) */ + int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */ int setkey; /* Set MD5 password to system SA/SP database */ /* Times below are in seconds */ unsigned gr_time; /* Graceful restart timeout */ + unsigned llgr_time; /* Long-lived graceful restart stale time */ unsigned connect_delay_time; /* Minimum delay between connect attempts */ unsigned connect_retry_time; /* Timeout for connect attempts */ unsigned hold_time, initial_hold_time; @@ -138,6 +140,8 @@ struct bgp_channel_config { u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ u8 gr_able; /* Allow full graceful restart for the channel */ + u8 llgr_able; /* Allow full long-lived GR for the channel */ + uint llgr_time; /* Long-lived graceful restart stale time */ u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */ u8 add_path; /* Use ADD-PATH extension [RFC 7911] */ @@ -166,12 +170,26 @@ struct bgp_channel_config { /* For GR capability per-AF flags */ #define BGP_GRF_FORWARDING 0x80 +#define BGP_LLGR_ABLE 1 +#define BGP_LLGR_AWARE 2 + +#define BGP_LLGRF_FORWARDING 0x80 + +#define BGP_GRS_NONE 0 /* No GR */ +#define BGP_GRS_ACTIVE 1 /* Graceful restart per RFC 4724 */ +#define BGP_GRS_LLGR 2 /* Long-lived GR phase (stale timer active) */ + +#define BGP_BFD_GRACEFUL 2 /* BFD down triggers graceful restart */ + struct bgp_af_caps { u32 afi; u8 ready; /* Multiprotocol capability, RFC 4760 */ u8 gr_able; /* Graceful restart support, RFC 4724 */ u8 gr_af_flags; /* Graceful restart per-AF flags */ + u8 llgr_able; /* Long-lived GR, RFC draft */ + u32 llgr_time; /* Long-lived GR stale time */ + u8 llgr_flags; /* Long-lived GR per-AF flags */ u8 ext_next_hop; /* Extended IPv6 next hop, RFC 5549 */ u8 add_path; /* Multiple paths support, RFC 7911 */ }; @@ -188,6 +206,8 @@ struct bgp_caps { u8 gr_flags; /* Graceful restart flags */ u16 gr_time; /* Graceful restart time in seconds */ + u8 llgr_aware; /* Long-lived GR capability, RFC draft */ + u16 af_count; /* Number of af_data items */ struct bgp_af_caps af_data[0]; /* Per-AF capability data */ @@ -243,6 +263,7 @@ struct bgp_proto { u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */ u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */ u8 gr_ready; /* Neighbor could do graceful restart */ + u8 llgr_ready; /* Neighbor could do Long-lived GR, implies gr_ready */ u8 gr_active_num; /* Neighbor is doing GR, number of active channels */ u8 channel_count; /* Number of active channels */ u32 *afi_map; /* Map channel index -> AFI */ @@ -291,10 +312,13 @@ struct bgp_channel { u32 packets_to_send; /* Bitmap of packet types to be sent */ + u8 ext_next_hop; /* Session allows both IPv4 and IPv6 next hops */ + u8 gr_ready; /* Neighbor could do GR on this AF */ - u8 gr_active; /* Neighbor is doing GR and keeping fwd state */ + u8 gr_active; /* Neighbor is doing GR (BGP_GRS_*) */ - u8 ext_next_hop; /* Session allows both IPv4 and IPv6 next hops */ + timer *stale_timer; /* Long-lived stale timer for LLGR */ + u32 stale_time; /* Stored LLGR stale time from last session */ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ @@ -505,6 +529,7 @@ void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); +struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old); int bgp_import_control(struct proto *, struct rte **, struct linpool *); int bgp_get_attr(struct eattr *e, byte *buf, int buflen); @@ -645,6 +670,9 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BGP_COMM_NO_ADVERTISE 0xffffff02 /* Don't export at all */ #define BGP_COMM_NO_EXPORT_SUBCONFED 0xffffff03 /* NO_EXPORT even in local confederation */ +#define BGP_COMM_LLGR_STALE 0xffff0006 /* Route is stale according to LLGR */ +#define BGP_COMM_NO_LLGR 0xffff0007 /* Do not treat the route according to LLGR */ + /* Origins */ #define ORIGIN_IGP 0 diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 7583f7d1..120b1e88 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -28,7 +28,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, - STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6) + STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, + LIVED, STALE) %type <i32> bgp_afi @@ -63,6 +64,8 @@ bgp_proto_start: proto_start BGP { BGP_CFG->default_local_pref = 100; BGP_CFG->gr_mode = BGP_GR_AWARE; BGP_CFG->gr_time = 120; + BGP_CFG->llgr_mode = -1; + BGP_CFG->llgr_time = 3600; BGP_CFG->setkey = 1; BGP_CFG->check_link = -1; } @@ -161,9 +164,13 @@ bgp_proto: | bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; } | bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; } | bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; } + | bgp_proto LONG LIVED GRACEFUL RESTART bool ';' { BGP_CFG->llgr_mode = $6; } + | bgp_proto LONG LIVED GRACEFUL RESTART AWARE ';' { BGP_CFG->llgr_mode = BGP_LLGR_AWARE; } + | bgp_proto LONG LIVED STALE TIME expr ';' { BGP_CFG->llgr_time = $6; } | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; } | bgp_proto CHECK LINK bool ';' { BGP_CFG->check_link = $4; } | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); } + | bgp_proto BFD GRACEFUL ';' { BGP_CFG->bfd = BGP_BFD_GRACEFUL; cf_check_bfd(1); } ; bgp_afi: @@ -199,6 +206,8 @@ bgp_channel_start: bgp_afi BGP_CC->afi = $1; BGP_CC->desc = desc; BGP_CC->gr_able = 0xff; /* undefined */ + BGP_CC->llgr_able = 0xff; /* undefined */ + BGP_CC->llgr_time = ~0U; /* undefined */ } }; @@ -214,6 +223,8 @@ bgp_channel_item: | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; } | SECONDARY bool { BGP_CC->secondary = $2; } | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; } + | LONG LIVED GRACEFUL RESTART bool { BGP_CC->llgr_able = $5; } + | LONG LIVED STALE TIME expr { BGP_CC->llgr_time = $5; } | EXTENDED NEXT HOP bool { BGP_CC->ext_next_hop = $4; } | ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; } | ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index c40b8ec2..ed1db04b 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -260,6 +260,9 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0; } + if (p->cf->llgr_mode) + caps->llgr_aware = 1; + /* Allocate and fill per-AF fields */ WALK_LIST(c, p->p.channels) { @@ -280,6 +283,15 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) if (p->p.gr_recovery) ac->gr_af_flags |= BGP_GRF_FORWARDING; } + + if (c->cf->llgr_able) + { + ac->llgr_able = 1; + ac->llgr_time = c->cf->llgr_time; + + if (p->p.gr_recovery) + ac->llgr_flags |= BGP_LLGRF_FORWARDING; + } } /* Sort capability fields by AFI/SAFI */ @@ -289,9 +301,9 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) /* Create capability list in buffer */ /* - * Note that max length is ~ 20+14*af_count. With max 12 channels that is - * 188. Option limit is 253 and buffer size is 4096, so we cannot overflow - * unless we add new capabilities or more AFs. + * Note that max length is ~ 22+21*af_count. With max 12 channels that is + * 274. Option limit is 253 and buffer size is 4096, so we cannot overflow + * unless we add new capabilities or more AFs. XXXXX */ WALK_AF_CAPS(caps, ac) @@ -384,6 +396,24 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) *buf++ = 0; /* Capability data length */ } + if (caps->llgr_aware) + { + *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */ + *buf++ = 0; /* Capability data length, will be fixed later */ + data = buf; + + WALK_AF_CAPS(caps, ac) + if (ac->llgr_able) + { + put_af3(buf, ac->afi); + buf[3] = ac->llgr_flags; + put_u24(buf+4, ac->llgr_time); + buf += 7; + } + + data[-1] = buf - data; + } + return buf; } @@ -508,11 +538,49 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i caps->enhanced_refresh = 1; break; + case 71: /* Long lived graceful restart capability, RFC draft */ + if (cl % 7) + goto err; + + /* Presumably, only the last instance is valid */ + WALK_AF_CAPS(caps, ac) + { + ac->llgr_able = 0; + ac->llgr_flags = 0; + ac->llgr_time = 0; + } + + caps->llgr_aware = 1; + + for (i = 0; i < cl; i += 7) + { + af = get_af3(pos+2+i); + ac = bgp_get_af_caps(caps, af); + ac->llgr_able = 1; + ac->llgr_flags = pos[2+i+3]; + ac->llgr_time = get_u24(pos + 2+i+4); + } + break; + /* We can safely ignore all other capabilities */ } ADVANCE(pos, len, 2 + cl); } + + /* The LLGR capability must be advertised together with the GR capability, + otherwise it must be disregarded */ + if (!caps->gr_aware && caps->llgr_aware) + { + caps->llgr_aware = 0; + WALK_AF_CAPS(caps, ac) + { + ac->llgr_able = 0; + ac->llgr_flags = 0; + ac->llgr_time = 0; + } + } + return; err: @@ -1131,6 +1199,7 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) e->pflags = 0; e->u.bgp.suppressed = 0; + e->u.bgp.stale = -1; rte_update2(&s->channel->c, n, e, s->last_src); } diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 7aada37e..82ccf38a 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -78,6 +78,12 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *o e->pref = new->pref; e->pflags = new->pflags; +#ifdef CONFIG_BGP + /* Hack to cleanup cached value */ + if (e->attrs->src->proto->proto == &proto_bgp) + e->u.bgp.stale = -1; +#endif + src = a->src; } else |