From 023a498e4a90982095c870c5491770bf4952fc2b Mon Sep 17 00:00:00 2001 From: ISHIDA Wataru Date: Wed, 10 Feb 2016 23:41:44 -0800 Subject: server: support graceful-restart helper-speaker behavior Signed-off-by: ISHIDA Wataru --- server/fsm.go | 112 ++++++++++++++++++++++++++++++++++++++++--------------- server/peer.go | 38 +++++++++++++++++++ server/server.go | 78 ++++++++++++++++++++++++-------------- 3 files changed, 168 insertions(+), 60 deletions(-) (limited to 'server') diff --git a/server/fsm.go b/server/fsm.go index aa0d7d4e..8a8748a5 100644 --- a/server/fsm.go +++ b/server/fsm.go @@ -116,24 +116,25 @@ func (s AdminState) String() string { } type FSM struct { - t tomb.Tomb - gConf *config.Global - pConf *config.Neighbor - state bgp.FSMState - reason FsmStateReason - conn net.Conn - connCh chan net.Conn - idleHoldTime float64 - opensentHoldTime float64 - adminState AdminState - adminStateCh chan AdminState - getActiveCh chan struct{} - h *FSMHandler - rfMap map[bgp.RouteFamily]bool - capMap map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface - recvOpen *bgp.BGPMessage - peerInfo *table.PeerInfo - policy *table.RoutingPolicy + t tomb.Tomb + gConf *config.Global + pConf *config.Neighbor + state bgp.FSMState + reason FsmStateReason + conn net.Conn + connCh chan net.Conn + idleHoldTime float64 + opensentHoldTime float64 + adminState AdminState + adminStateCh chan AdminState + getActiveCh chan struct{} + h *FSMHandler + rfMap map[bgp.RouteFamily]bool + capMap map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface + recvOpen *bgp.BGPMessage + peerInfo *table.PeerInfo + policy *table.RoutingPolicy + gracefulRestartTimer *time.Timer } func (fsm *FSM) bgpMessageStateUpdate(MessageType uint8, isIn bool) { @@ -191,19 +192,21 @@ func NewFSM(gConf *config.Global, pConf *config.Neighbor, policy *table.RoutingP adminState = ADMIN_STATE_DOWN } fsm := &FSM{ - gConf: gConf, - pConf: pConf, - state: bgp.BGP_FSM_IDLE, - connCh: make(chan net.Conn, 1), - opensentHoldTime: float64(HOLDTIME_OPENSENT), - adminState: adminState, - adminStateCh: make(chan AdminState, 1), - getActiveCh: make(chan struct{}), - rfMap: make(map[bgp.RouteFamily]bool), - capMap: make(map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface), - peerInfo: table.NewPeerInfo(gConf, pConf), - policy: policy, + gConf: gConf, + pConf: pConf, + state: bgp.BGP_FSM_IDLE, + connCh: make(chan net.Conn, 1), + opensentHoldTime: float64(HOLDTIME_OPENSENT), + adminState: adminState, + adminStateCh: make(chan AdminState, 1), + getActiveCh: make(chan struct{}), + rfMap: make(map[bgp.RouteFamily]bool), + capMap: make(map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface), + peerInfo: table.NewPeerInfo(gConf, pConf), + policy: policy, + gracefulRestartTimer: time.NewTimer(time.Hour), } + fsm.gracefulRestartTimer.Stop() fsm.t.Go(fsm.connectLoop) return fsm } @@ -285,7 +288,7 @@ func (fsm *FSM) connectLoop() error { ticker.Stop() connect := func() { - if fsm.state == bgp.BGP_FSM_ACTIVE { + if fsm.state == bgp.BGP_FSM_ACTIVE && !fsm.pConf.GracefulRestart.State.PeerRestarting { addr := fsm.pConf.Config.NeighborAddress host := net.JoinHostPort(addr, strconv.Itoa(bgp.BGP_PORT)) // check if LocalAddress has been configured @@ -376,6 +379,15 @@ func (h *FSMHandler) idle() (bgp.FSMState, FsmStateReason) { select { case <-h.t.Dying(): return -1, FSM_DYING + case <-fsm.gracefulRestartTimer.C: + if fsm.pConf.GracefulRestart.State.PeerRestarting { + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": fsm.pConf.Config.NeighborAddress, + "State": fsm.state, + }).Warn("graceful restart timer expired") + return bgp.BGP_FSM_IDLE, FSM_RESTART_TIMER_EXPIRED + } case conn, ok := <-fsm.connCh: if !ok { break @@ -440,6 +452,15 @@ func (h *FSMHandler) active() (bgp.FSMState, FsmStateReason) { // we don't implement delayed open timer so move to opensent right // away. return bgp.BGP_FSM_OPENSENT, 0 + case <-fsm.gracefulRestartTimer.C: + if fsm.pConf.GracefulRestart.State.PeerRestarting { + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": fsm.pConf.Config.NeighborAddress, + "State": fsm.state, + }).Warn("graceful restart timer expired") + return bgp.BGP_FSM_IDLE, FSM_RESTART_TIMER_EXPIRED + } case err := <-h.errorCh: return bgp.BGP_FSM_IDLE, err case s := <-fsm.adminStateCh: @@ -708,6 +729,15 @@ func (h *FSMHandler) opensent() (bgp.FSMState, FsmStateReason) { "Key": fsm.pConf.Config.NeighborAddress, "State": fsm.state, }).Warn("Closed an accepted connection") + case <-fsm.gracefulRestartTimer.C: + if fsm.pConf.GracefulRestart.State.PeerRestarting { + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": fsm.pConf.Config.NeighborAddress, + "State": fsm.state, + }).Warn("graceful restart timer expired") + return bgp.BGP_FSM_IDLE, FSM_RESTART_TIMER_EXPIRED + } case e := <-h.msgCh: switch e.MsgData.(type) { case *bgp.BGPMessage: @@ -868,6 +898,15 @@ func (h *FSMHandler) openconfirm() (bgp.FSMState, FsmStateReason) { "Key": fsm.pConf.Config.NeighborAddress, "State": fsm.state, }).Warn("Closed an accepted connection") + case <-fsm.gracefulRestartTimer.C: + if fsm.pConf.GracefulRestart.State.PeerRestarting { + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": fsm.pConf.Config.NeighborAddress, + "State": fsm.state, + }).Warn("graceful restart timer expired") + return bgp.BGP_FSM_IDLE, FSM_RESTART_TIMER_EXPIRED + } case <-ticker.C: m := bgp.NewBGPKeepAliveMessage() b, _ := m.Serialize() @@ -1031,6 +1070,8 @@ func (h *FSMHandler) established() (bgp.FSMState, FsmStateReason) { holdTimer = time.NewTimer(time.Second * time.Duration(fsm.pConf.Timers.State.NegotiatedHoldTime)) } + fsm.gracefulRestartTimer.Stop() + for { select { case <-h.t.Dying(): @@ -1048,6 +1089,15 @@ func (h *FSMHandler) established() (bgp.FSMState, FsmStateReason) { case err := <-h.errorCh: h.conn.Close() h.t.Kill(nil) + if s := fsm.pConf.GracefulRestart.State; s.Enabled && (err == FSM_READ_FAILED || err == FSM_WRITE_FAILED) { + err = FSM_GRACEFUL_RESTART + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": fsm.pConf.Config.NeighborAddress, + "State": fsm.state, + }).Info("peer graceful restart") + fsm.gracefulRestartTimer.Reset(time.Duration(fsm.pConf.GracefulRestart.State.PeerRestartTime) * time.Second) + } return bgp.BGP_FSM_IDLE, err case <-holdTimer.C: log.WithFields(log.Fields{ diff --git a/server/peer.go b/server/peer.go index da76bf93..ee9c8bdd 100644 --- a/server/peer.go +++ b/server/peer.go @@ -97,11 +97,45 @@ func (peer *Peer) isGracefulRestartEnabled() bool { return peer.fsm.pConf.GracefulRestart.State.Enabled } +func (peer *Peer) recvedAllEOR() bool { + for _, a := range peer.fsm.pConf.AfiSafis { + if s := a.MpGracefulRestart.State; s.Enabled && !s.EndOfRibReceived { + return false + } + } + return true +} + func (peer *Peer) configuredRFlist() []bgp.RouteFamily { rfs, _ := config.AfiSafis(peer.conf.AfiSafis).ToRfList() return rfs } +func (peer *Peer) forwardingPreservedFamilies() ([]bgp.RouteFamily, []bgp.RouteFamily) { + list := []bgp.RouteFamily{} + for _, a := range peer.fsm.pConf.AfiSafis { + if s := a.MpGracefulRestart.State; s.Enabled && s.Received { + f, _ := bgp.GetRouteFamily(string(a.AfiSafiName)) + list = append(list, f) + } + } + preserved := []bgp.RouteFamily{} + notPreserved := []bgp.RouteFamily{} + for _, f := range peer.configuredRFlist() { + p := true + for _, g := range list { + if f == g { + p = false + preserved = append(preserved, f) + } + } + if p { + notPreserved = append(notPreserved, f) + } + } + return preserved, notPreserved +} + func (peer *Peer) getAccepted(rfList []bgp.RouteFamily) []*table.Path { return peer.adjRibIn.PathList(rfList, true) } @@ -206,6 +240,10 @@ func (peer *Peer) startFSMHandler(incoming, stateCh chan *FsmMsg) { peer.fsm.h = NewFSMHandler(peer.fsm, incoming, stateCh, peer.outgoing) } +func (peer *Peer) StaleAll(rfList []bgp.RouteFamily) { + peer.adjRibIn.StaleAll(rfList) +} + func (peer *Peer) PassConn(conn *net.TCPConn) { select { case peer.fsm.connCh <- conn: diff --git a/server/server.go b/server/server.go index a833fa7f..f9976624 100644 --- a/server/server.go +++ b/server/server.go @@ -452,7 +452,7 @@ func (server *BgpServer) Serve() { t.Stop() }(addr) - m := server.dropPeerAllRoutes(peer) + m := server.dropPeerAllRoutes(peer, peer.configuredRFlist()) if len(m) > 0 { senderMsgs = append(senderMsgs, m...) } @@ -580,11 +580,11 @@ func filterpath(peer *Peer, path *table.Path) *table.Path { return path } -func (server *BgpServer) dropPeerAllRoutes(peer *Peer) []*SenderMsg { +func (server *BgpServer) dropPeerAllRoutes(peer *Peer, families []bgp.RouteFamily) []*SenderMsg { msgs := make([]*SenderMsg, 0) options := &table.PolicyOptions{} - for _, rf := range peer.configuredRFlist() { + for _, rf := range families { dsts := server.globalRib.DeletePathsByPeer(peer.fsm.peerInfo, rf) server.validatePaths(dsts, true) if peer.isRouteServerClient() { @@ -957,10 +957,26 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) []*SenderMsg { if t.Sub(time.Unix(peer.conf.Timers.State.Uptime, 0)) < FLOP_THRESHOLD { peer.conf.State.Flops++ } - + var drop []bgp.RouteFamily + if peer.fsm.reason == FSM_GRACEFUL_RESTART { + peer.fsm.pConf.GracefulRestart.State.PeerRestarting = true + var p []bgp.RouteFamily + p, drop = peer.forwardingPreservedFamilies() + peer.StaleAll(p) + } else { + drop = peer.configuredRFlist() + } + peer.DropAll(drop) + msgs = append(msgs, server.dropPeerAllRoutes(peer, drop)...) + } else if peer.fsm.pConf.GracefulRestart.State.PeerRestarting && nextState == bgp.BGP_FSM_IDLE { + // RFC 4724 4.2 + // If the session does not get re-established within the "Restart Time" + // that the peer advertised previously, the Receiving Speaker MUST + // delete all the stale routes from the peer that it is retaining. + peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false peer.DropAll(peer.configuredRFlist()) - msgs = append(msgs, server.dropPeerAllRoutes(peer)...) + msgs = append(msgs, server.dropPeerAllRoutes(peer, peer.configuredRFlist())...) } close(peer.outgoing) @@ -1072,6 +1088,14 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) []*SenderMsg { } if len(eor) > 0 { + for _, f := range eor { + for i, a := range peer.fsm.pConf.AfiSafis { + if g, _ := bgp.GetRouteFamily(string(a.AfiSafiName)); f == g { + peer.fsm.pConf.AfiSafis[i].MpGracefulRestart.State.EndOfRibReceived = true + } + } + } + // RFC 4724 4.1 // Once the session between the Restarting Speaker and the Receiving // Speaker is re-established, ...snip... it MUST defer route @@ -1080,33 +1104,17 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) []*SenderMsg { // "Restart State" bit set in the received capability and excluding the // ones that do not advertise the graceful restart capability) or ...snip... if peer.fsm.pConf.GracefulRestart.State.LocalRestarting { - var end bool - for _, f := range eor { - end = true - for i, a := range peer.fsm.pConf.AfiSafis { - if g, _ := bgp.GetRouteFamily(string(a.AfiSafiName)); f == g { - peer.fsm.pConf.AfiSafis[i].MpGracefulRestart.State.EndOfRibReceived = true - } - if s := a.MpGracefulRestart.State; s.Enabled && !s.EndOfRibReceived { - end = false + allEnd := func() bool { + for _, p := range server.neighborMap { + if !p.recvedAllEOR() { + return false } } - } - if end { - log.WithFields(log.Fields{ - "Topic": "Peer", - "Key": peer.conf.Config.NeighborAddress, - }).Debug("all family's EOR received") - peer.fsm.pConf.GracefulRestart.State.LocalRestarting = false - } - allEnd := true - for _, p := range server.neighborMap { - if p.fsm.pConf.GracefulRestart.State.LocalRestarting { - allEnd = false - } - } + return true + }() if allEnd { for _, p := range server.neighborMap { + p.fsm.pConf.GracefulRestart.State.LocalRestarting = false if !p.isGracefulRestartEnabled() { continue } @@ -1121,6 +1129,18 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) []*SenderMsg { }).Info("sync finished") } } + if peer.fsm.pConf.GracefulRestart.State.PeerRestarting { + if peer.recvedAllEOR() { + peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false + pathList := peer.adjRibIn.DropStale(peer.configuredRFlist()) + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.conf.Config.NeighborAddress, + }).Debugf("withdraw %d stale routes", len(pathList)) + m, _ := server.propagateUpdate(peer, pathList) + msgs = append(msgs, m...) + } + } } default: log.WithFields(log.Fields{ @@ -2381,7 +2401,7 @@ func (server *BgpServer) handleGrpcModNeighbor(grpcReq *GrpcRequest) (sMsgs []*S n.fsm.t.Wait() t.Stop() }(addr) - m := server.dropPeerAllRoutes(n) + m := server.dropPeerAllRoutes(n, n.configuredRFlist()) if len(m) > 0 { sMsgs = append(sMsgs, m...) } -- cgit v1.2.3