From 9c8b20a2ef4b29dd94fcd35e7e16541dabef2330 Mon Sep 17 00:00:00 2001 From: Patrick Hemmer Date: Sun, 13 May 2018 02:33:33 -0400 Subject: fix graceful-restart when not all peers are restarting This fixes an issue where GoBGP would incorrectly defer sending routes to a peer on a graceful restart. RFC4724 states: > Once the session between the Restarting Speaker and the Receiving > Speaker is re-established, the Restarting Speaker will receive and > process BGP messages from its peers. However, it MUST defer route > selection for an address family until it either (a) receives the > End-of-RIB marker from all its peers (excluding the ones with the > "Restart State" bit set in the received capability and excluding the > ones that do not advertise the graceful restart capability) or (b) > the Selection_Deferral_Timer referred to below has expired. The scenario that this fixes is where you have 3 (or more) peers, and 2 of the peers ("A" and "B") perform a graceful restart at the same time, but the 3rd ("C") does not. If after restart peer C sends EOR to peer A before peer B reaches BGP_FSM_ESTABLISHED with peer A, it defers the route selection. However once peer B does reach BGP_FSM_ESTABLISHED, peer A still wouldn't send any updates to peer C until the deferral expired. This commit changes the behavior so that upon restart, once the peer receives EOR from all non-restarting peers and reaches BGP_FSM_ESTABLISHED for all restarting peers, the routes are sent to all peers. --- server/server.go | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'server/server.go') diff --git a/server/server.go b/server/server.go index df0306b4..87348a72 100644 --- a/server/server.go +++ b/server/server.go @@ -1156,16 +1156,42 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) { } else { // RFC 4724 4.1 // Once the session between the Restarting Speaker and the Receiving - // Speaker is re-established, the Restarting Speaker will receive and - // process BGP messages from its peers. However, it MUST defer route - // selection for an address family until it either (a) ...snip... - // or (b) the Selection_Deferral_Timer referred to below has expired. - deferral := peer.fsm.pConf.GracefulRestart.Config.DeferralTime - log.WithFields(log.Fields{ - "Topic": "Peer", - "Key": peer.ID(), - }).Debugf("Now syncing, suppress sending updates. start deferral timer(%d)", deferral) - time.AfterFunc(time.Second*time.Duration(deferral), deferralExpiredFunc(bgp.RouteFamily(0))) + // Speaker is re-established, ...snip... it MUST defer route + // selection for an address family until it either (a) receives the + // End-of-RIB marker from all its peers (excluding the ones with the + // "Restart State" bit set in the received capability and excluding the + // ones that do not advertise the graceful restart capability) or (b) + // the Selection_Deferral_Timer referred to below has expired. + allEnd := func() bool { + for _, p := range server.neighborMap { + if !p.recvedAllEOR() { + return false + } + } + return true + }() + if allEnd { + for _, p := range server.neighborMap { + p.fsm.pConf.GracefulRestart.State.LocalRestarting = false + if !p.isGracefulRestartEnabled() { + continue + } + paths, _ := server.getBestFromLocal(p, p.configuredRFlist()) + if len(paths) > 0 { + sendFsmOutgoingMsg(p, paths, nil, false) + } + } + log.WithFields(log.Fields{ + "Topic": "Server", + }).Info("sync finished") + } else { + deferral := peer.fsm.pConf.GracefulRestart.Config.DeferralTime + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.ID(), + }).Debugf("Now syncing, suppress sending updates. start deferral timer(%d)", deferral) + time.AfterFunc(time.Second*time.Duration(deferral), deferralExpiredFunc(bgp.RouteFamily(0))) + } } } else { if server.shutdown && nextState == bgp.BGP_FSM_IDLE { -- cgit v1.2.3