diff options
author | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2018-06-04 18:56:05 +0200 |
---|---|---|
committer | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2018-07-17 13:16:18 +0200 |
commit | 68197386ddba616c9973d3190c42f6121a25d9b7 (patch) | |
tree | f328462a31a041a8c87f11c76e06503a515943c5 /proto | |
parent | 470efcb98cb33de2d5636679eb0f72c88280d6b8 (diff) |
BGP: Long-lived graceful restart
The patch implements long-lived graceful restart for BGP, namely
draft-uttaro-idr-bgp-persistence-03.
Diffstat (limited to 'proto')
-rw-r--r-- | proto/bgp/attrs.c | 53 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 75 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 23 | ||||
-rw-r--r-- | proto/bgp/config.Y | 8 | ||||
-rw-r--r-- | proto/bgp/packets.c | 62 | ||||
-rw-r--r-- | proto/pipe/pipe.c | 6 |
6 files changed, 212 insertions, 15 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 73eb4040..3b88791d 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1173,6 +1173,9 @@ bgp_community_filter(struct bgp_proto *p, rte *e) DBG("\tNO_EXPORT\n"); return 1; } + + if (!p->conn->peer_llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE)) + return 1; } return 0; @@ -1233,6 +1236,19 @@ rte_resolvable(rte *rt) return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH); } +static inline int +rte_stale(rte *r) +{ + if (r->u.bgp.stale < 0) + { + /* If staleness is unknown, compute and cache it */ + eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY)); + r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE); + } + + return r->u.bgp.stale; +} + int bgp_rte_better(rte *new, rte *old) { @@ -1257,6 +1273,14 @@ bgp_rte_better(rte *new, rte *old) if (n < o) return 0; + /* LLGR draft - depreference stale routes */ + n = rte_stale(new); + o = rte_stale(old); + if (n > o) + return 0; + if (n < o) + return 1; + /* Start with local preferences */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); @@ -1378,6 +1402,10 @@ bgp_rte_mergable(rte *pri, rte *sec) if (!rte_resolvable(sec)) return 0; + /* LLGR draft - depreference stale routes */ + if (rte_stale(pri) != rte_stale(sec)) + return 0; + /* Start with local preferences */ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); @@ -1580,6 +1608,27 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return old_is_group_best; } +struct rte * +bgp_rte_modify_stale(struct rte *r, struct linpool *pool) +{ + eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY)); + struct adata *ad = a ? a->u.ptr : NULL; + + if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + return NULL; + + if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + return r; + + r = rte_cow_rta(r, pool); + bgp_attach_attr(&(r->attrs->eattrs), pool, BA_COMMUNITY, + (uintptr_t) int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); + r->u.bgp.stale = 1; + + return r; +} + + static struct adata * bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool) { @@ -1589,7 +1638,6 @@ bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool) return newa; } - /* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format * and append path old4 (in 4B format). */ @@ -1985,6 +2033,9 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs) if (e->u.bgp.suppressed) buf += bsprintf(buf, "-"); + if (rte_stale(e)) + buf += bsprintf(buf, "s"); + if (e->attrs->hostentry) { if (!rte_resolvable(e)) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index b99672f5..0932051a 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -394,10 +394,17 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready) p->p.gr_wait = 1; - if (p->gr_active) + if (p->gr_active == BGP_GRS_ACTIVE) tm_stop(p->gr_timer); - if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING))) + /* Check F-bit for regular graceful restart */ + if ((p->gr_active == BGP_GRS_ACTIVE) && + (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING))) + bgp_graceful_restart_done(p); + + /* Check F-bit for long-lived graceful restart */ + if (((p->gr_active == BGP_GRS_LLGR_1) || (p->gr_active == BGP_GRS_LLGR_2)) && + (!conn->peer_llgr_able || !(conn->peer_llgr_aflags & BGP_LLGRF_FORWARDING))) bgp_graceful_restart_done(p); /* GR capability implies that neighbor will send End-of-RIB */ @@ -474,11 +481,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p) p->gr_active ? " - already pending" : ""); proto_notify_state(&p->p, PS_START); - if (p->gr_active) + switch (p->gr_active) + { + case BGP_GRS_ACTIVE: rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); + break; + + case BGP_GRS_LLGR_1: + rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook); + return; + + case BGP_GRS_LLGR_2: + rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook); + rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook); + return; + } - p->gr_active = 1; - bgp_start_timer(p->gr_timer, p->conn->peer_gr_time); + p->stale_time = p->cf->llgr_mode ? p->conn->peer_llgr_time : 0; + p->gr_active = !p->stale_time ? BGP_GRS_ACTIVE : BGP_GRS_LLGR_1; + tm_start(p->gr_timer, p->conn->peer_gr_time); rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook); } @@ -515,10 +536,27 @@ bgp_graceful_restart_timeout(timer *t) { struct bgp_proto *p = t->data; - BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout"); - bgp_stop(p, 0, NULL, 0); -} + switch (p->gr_active) + { + case BGP_GRS_ACTIVE: + BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout"); + bgp_stop(p, 0, NULL, 0); + return; + + case BGP_GRS_LLGR_1: + BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout"); + p->gr_active = BGP_GRS_LLGR_2; + tm_start(p->gr_timer, p->stale_time); + rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook); + return; + case BGP_GRS_LLGR_2: + BGP_TRACE(D_EVENTS, "Long-lived graceful restart timeout"); + p->gr_active = 0; + rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); + return; + } +} /** * bgp_refresh_begin - start incoming enhanced route refresh sequence @@ -576,6 +614,10 @@ bgp_send_open(struct bgp_conn *conn) conn->peer_gr_time = 0; conn->peer_gr_flags = 0; conn->peer_gr_aflags = 0; + conn->peer_llgr_aware = 0; + conn->peer_llgr_able = 0; + conn->peer_llgr_time = 0; + conn->peer_llgr_aflags = 0; conn->peer_ext_messages_support = 0; DBG("BGP: Sending open\n"); @@ -1297,6 +1339,7 @@ bgp_init(struct proto_config *C) P->rte_better = bgp_rte_better; P->rte_mergable = bgp_rte_mergable; P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL; + P->rte_modify = bgp_rte_modify_stale; p->cf = c; p->local_as = c->local_as; @@ -1332,6 +1375,10 @@ bgp_check_config(struct bgp_config *c) if (!c->missing_lladdr) c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF; + /* LLGR mode default based on GR mode */ + if (c->llgr_mode < 0) + c->llgr_mode = c->gr_mode ? BGP_LLGR_AWARE : 0; + /* Disable after error incompatible with restart limit action */ if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error) c->c.in_limit->action = PLA_DISABLE; @@ -1382,6 +1429,9 @@ bgp_check_config(struct bgp_config *c) if (c->secondary && !c->c.table->sorted) cf_error("BGP with secondary option requires sorted table"); + + if (!c->gr_mode && c->llgr_mode) + cf_error("Long-lived graceful restart requires basic graceful restart"); } static int @@ -1550,6 +1600,11 @@ bgp_show_proto_info(struct proto *P) if (p->gr_active) cli_msg(-1006, " Neighbor graceful restart active"); + if (p->gr_active && p->gr_timer->expires) + cli_msg(-1006, " %-15s %d/-", + (p->gr_active != BGP_GRS_LLGR_2) ? "Restart timer:" : "LL stale timer:", + p->gr_timer->expires - now); + if (P->proto_state == PS_START) { struct bgp_conn *oc = &p->outgoing_conn; @@ -1563,9 +1618,6 @@ bgp_show_proto_info(struct proto *P) (oc->connect_retry_timer->expires)) cli_msg(-1006, " Connect delay: %d/%d", oc->connect_retry_timer->expires - now, p->cf->connect_delay_time); - - if (p->gr_active && p->gr_timer->expires) - cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now); } else if (P->proto_state == PS_UP) { @@ -1574,6 +1626,7 @@ bgp_show_proto_info(struct proto *P) c->peer_refresh_support ? " refresh" : "", c->peer_enhanced_refresh_support ? " enhanced-refresh" : "", c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""), + c->peer_llgr_able ? " llgr-able" : (c->peer_llgr_aware ? " llgr-aware" : ""), c->peer_as4_support ? " AS4" : "", (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "", (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "", diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b3db8b7e..53194fb6 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -52,8 +52,10 @@ struct bgp_config { int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */ int gr_mode; /* Graceful restart mode (BGP_GR_*) */ + int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */ int setkey; /* Set MD5 password to system SA/SP database */ unsigned gr_time; /* Graceful restart timeout */ + unsigned llgr_time; /* Long-lived graceful restart timeout */ unsigned connect_delay_time; /* Minimum delay between connect attempts */ unsigned connect_retry_time; /* Timeout for connect attempts */ unsigned hold_time, initial_hold_time; @@ -90,6 +92,16 @@ struct bgp_config { /* For peer_gr_aflags */ #define BGP_GRF_FORWARDING 0x80 +#define BGP_LLGR_ABLE 1 +#define BGP_LLGR_AWARE 2 + +#define BGP_LLGRF_FORWARDING 0x80 + +#define BGP_GRS_NONE 0 /* No GR */ +#define BGP_GRS_ACTIVE 1 /* Graceful restart per RFC 4724 */ +#define BGP_GRS_LLGR_1 2 /* Long-lived GR phase 1 (restart time) */ +#define BGP_GRS_LLGR_2 3 /* Long-lived GR phase 2 (stale time) */ + struct bgp_conn { struct bgp_proto *bgp; @@ -113,6 +125,10 @@ struct bgp_conn { u16 peer_gr_time; u8 peer_gr_flags; u8 peer_gr_aflags; + u8 peer_llgr_aware; + u8 peer_llgr_able; + u16 peer_llgr_time; + u8 peer_llgr_aflags; u8 peer_ext_messages_support; /* Peer supports extended message length [draft] */ unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; @@ -133,9 +149,10 @@ struct bgp_proto { int rr_client; /* Whether neighbor is RR client of me */ int rs_client; /* Whether neighbor is RS client of me */ u8 gr_ready; /* Neighbor could do graceful restart */ - u8 gr_active; /* Neighbor is doing graceful restart */ + u8 gr_active; /* Neighbor is doing graceful restart (BGP_GRS_*) */ u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */ u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */ + uint stale_time; /* Long-lived stale time for LLGR */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ @@ -252,6 +269,7 @@ int bgp_get_attr(struct eattr *e, byte *buf, int buflen); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); +struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs); int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); void bgp_init_bucket_table(struct bgp_proto *); @@ -398,6 +416,9 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BGP_COMM_NO_ADVERTISE 0xffffff02 /* Don't export at all */ #define BGP_COMM_NO_EXPORT_SUBCONFED 0xffffff03 /* NO_EXPORT even in local confederation */ +#define BGP_COMM_LLGR_STALE 0xffff0006 /* Route is stale according to LLGR */ +#define BGP_COMM_NO_LLGR 0xffff0007 /* Do not treat the route according to LLGR */ + /* Origins */ #define ORIGIN_IGP 0 diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 075403a3..2ed1944d 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -27,7 +27,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, - CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, BGP_LARGE_COMMUNITY) + CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, BGP_LARGE_COMMUNITY, + LONG, LIVED, STALE) CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER, CONFIGURATION, CHANGE, DECONFIGURED, CONNECTION, REJECTED, COLLISION, @@ -60,6 +61,8 @@ bgp_proto_start: proto_start BGP { BGP_CFG->default_local_pref = 100; BGP_CFG->gr_mode = BGP_GR_AWARE; BGP_CFG->gr_time = 120; + BGP_CFG->llgr_mode = -1; + BGP_CFG->llgr_time = 3600; BGP_CFG->setkey = 1; } ; @@ -162,6 +165,9 @@ bgp_proto: | bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; } | bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; } | bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; } + | bgp_proto LONG LIVED GRACEFUL RESTART bool ';' { BGP_CFG->llgr_mode = $6; } + | bgp_proto LONG LIVED GRACEFUL RESTART AWARE ';' { BGP_CFG->llgr_mode = BGP_LLGR_AWARE; } + | bgp_proto LONG LIVED STALE TIME expr ';' { BGP_CFG->llgr_time = $6; } | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; } | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; } | bgp_proto CHECK LINK bool ';' { BGP_CFG->check_link = $4; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index f0049d3a..0d1a3414 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -231,6 +231,32 @@ bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf) return buf; } +static byte * +bgp_put_cap_llgr1(struct bgp_proto *p, byte *buf) +{ + *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */ + *buf++ = 7; /* Capability data length */ + + *buf++ = 0; /* Appropriate AF */ + *buf++ = BGP_AF; + *buf++ = 1; /* and SAFI 1 */ + + /* Next is 8bit flags and 24bit time */ + put_u32(buf, p->cf->llgr_time); + buf[0] = p->p.gr_recovery ? BGP_LLGRF_FORWARDING : 0; + buf += 4; + + return buf; +} + +static byte * +bgp_put_cap_llgr2(struct bgp_proto *p UNUSED, byte *buf) +{ + *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */ + *buf++ = 0; /* Capability data length */ + return buf; +} + static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) @@ -285,6 +311,11 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) if (p->cf->enable_extended_messages) cap = bgp_put_cap_ext_msg(p, cap); + if (p->cf->llgr_mode == BGP_LLGR_ABLE) + cap = bgp_put_cap_llgr1(p, cap); + else if (p->cf->llgr_mode == BGP_LLGR_AWARE) + cap = bgp_put_cap_llgr2(p, cap); + cap_len = cap - buf - 12; if (cap_len > 0) { @@ -872,11 +903,38 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_enhanced_refresh_support = 1; break; + case 71: /* Long-lived graceful restart capability, RFC draft */ + if (cl % 7) + goto err; + conn->peer_llgr_aware = 1; + conn->peer_llgr_able = 0; + conn->peer_llgr_time = 0; + conn->peer_llgr_aflags = 0; + for (i = 0; i < cl; i += 4) + if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */ + { + conn->peer_llgr_able = 1; + conn->peer_llgr_time = get_u32(opt + 2+i+3) & 0xffffff; + conn->peer_llgr_aflags = opt[2+i+3]; + } + break; + /* We can safely ignore all other capabilities */ } len -= 2 + cl; opt += 2 + cl; } + + /* The LLGR capability must be advertised together with the GR capability, + otherwise it must be disregarded */ + if (!conn->peer_gr_aware && conn->peer_llgr_aware) + { + conn->peer_llgr_aware = 0; + conn->peer_llgr_able = 0; + conn->peer_llgr_time = 0; + conn->peer_llgr_aflags = 0; + } + return; err: @@ -1034,7 +1092,8 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) p->as4_session = p->cf->enable_as4 && conn->peer_as4_support; p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX); p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX); - p->gr_ready = p->cf->gr_mode && conn->peer_gr_able; + p->gr_ready = (p->cf->gr_mode && conn->peer_gr_able) || + (p->cf->llgr_mode && conn->peer_llgr_able); p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support; /* Update RA mode */ @@ -1125,6 +1184,7 @@ bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen, e->net = n; e->pflags = 0; e->u.bgp.suppressed = 0; + e->u.bgp.stale = -1; rte_update2(p->p.main_ahook, n, e, *src); } diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 5d0e3c76..164191dd 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -89,6 +89,12 @@ pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, e memcpy(&(e->u), &(new->u), sizeof(e->u)); e->pref = new->pref; e->pflags = new->pflags; + +#ifdef CONFIG_BGP + /* Hack to cleanup cached value */ + if (e->attrs->src->proto->proto == &proto_bgp) + e->u.bgp.stale = -1; +#endif } src = a.src; |