diff options
Diffstat (limited to 'server')
-rw-r--r-- | server/fsm.go | 32 | ||||
-rw-r--r-- | server/peer.go | 120 | ||||
-rw-r--r-- | server/server.go | 79 |
3 files changed, 206 insertions, 25 deletions
diff --git a/server/fsm.go b/server/fsm.go index d86a5ff1..c8ae658b 100644 --- a/server/fsm.go +++ b/server/fsm.go @@ -27,6 +27,7 @@ import ( "math/rand" "net" "strconv" + "strings" "time" ) @@ -521,6 +522,7 @@ func capabilitiesFromConfig(pConf *config.Neighbor) []bgp.ParameterCapabilityInt if c := pConf.GracefulRestart.Config; c.Enabled { tuples := []*bgp.CapGracefulRestartTuple{} + ltuples := []*bgp.CapLongLivedGracefulRestartTuple{} // RFC 4724 4.1 // To re-establish the session with its peer, the Restarting Speaker @@ -530,8 +532,8 @@ func capabilitiesFromConfig(pConf *config.Neighbor) []bgp.ParameterCapabilityInt if !c.HelperOnly { for i, rf := range pConf.AfiSafis { - if rf.MpGracefulRestart.Config.Enabled { - k, _ := bgp.GetRouteFamily(string(rf.Config.AfiSafiName)) + k, _ := bgp.GetRouteFamily(string(rf.Config.AfiSafiName)) + if m := rf.MpGracefulRestart.Config; m.Enabled { // When restarting, always flag forwaring bit. // This can be a lie, depending on how gobgpd is used. // For a route-server use-case, since a route-server @@ -542,11 +544,17 @@ func capabilitiesFromConfig(pConf *config.Neighbor) []bgp.ParameterCapabilityInt tuples = append(tuples, bgp.NewCapGracefulRestartTuple(k, restarting)) pConf.AfiSafis[i].MpGracefulRestart.State.Advertised = true } + if m := rf.LongLivedGracefulRestart.Config; m.Enabled { + ltuples = append(ltuples, bgp.NewCapLongLivedGracefulRestartTuple(k, restarting, m.RestartTime)) + } } } time := c.RestartTime notification := c.NotificationEnabled caps = append(caps, bgp.NewCapGracefulRestart(restarting, notification, time, tuples)) + if c.LongLivedEnabled { + caps = append(caps, bgp.NewCapLongLivedGracefulRestart(ltuples)) + } } return caps } @@ -695,7 +703,7 @@ func (h *FSMHandler) recvMessageWithError() (*FsmMsg, error) { "Subcode": body.ErrorSubcode, "Data": body.Data, }).Warn("received notification") - if body.ErrorCode == bgp.BGP_ERROR_CEASE && body.ErrorSubcode == bgp.BGP_ERROR_SUB_HARD_RESET { + if s := h.fsm.pConf.GracefulRestart.State; s.Enabled && s.NotificationEnabled && body.ErrorCode == bgp.BGP_ERROR_CEASE && body.ErrorSubcode == bgp.BGP_ERROR_SUB_HARD_RESET { sendToErrorCh(FSM_HARD_RESET) } else { sendToErrorCh(FsmStateReason(fmt.Sprintf("%s %s", FSM_NOTIFICATION_RECV, bgp.NewNotificationErrorCode(body.ErrorCode, body.ErrorSubcode).String()))) @@ -866,6 +874,22 @@ func (h *FSMHandler) opensent() (bgp.FSMState, FsmStateReason) { fsm.pConf.GracefulRestart.State.NotificationEnabled = true } } + llgr, ok2 := fsm.capMap[bgp.BGP_CAP_LONG_LIVED_GRACEFUL_RESTART] + if fsm.pConf.GracefulRestart.Config.LongLivedEnabled && ok && ok2 { + fsm.pConf.GracefulRestart.State.LongLivedEnabled = true + cap := llgr[len(llgr)-1].(*bgp.CapLongLivedGracefulRestart) + for _, t := range cap.Tuples { + n := bgp.AddressFamilyNameMap[bgp.AfiSafiToRouteFamily(t.AFI, t.SAFI)] + for i, a := range fsm.pConf.AfiSafis { + if string(a.Config.AfiSafiName) == n { + fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.Enabled = true + fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.Received = true + fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.PeerRestartTime = t.RestartTime + break + } + } + } + } msg := bgp.NewBGPKeepAliveMessage() b, _ := msg.Serialize() @@ -1178,7 +1202,7 @@ func (h *FSMHandler) established() (bgp.FSMState, FsmStateReason) { case err := <-h.errorCh: h.conn.Close() h.t.Kill(nil) - if s := fsm.pConf.GracefulRestart.State; s.Enabled && ((s.NotificationEnabled && err != FSM_HARD_RESET) || err == FSM_READ_FAILED || err == FSM_WRITE_FAILED) { + if s := fsm.pConf.GracefulRestart.State; s.Enabled && ((s.NotificationEnabled && strings.HasPrefix(string(err), FSM_NOTIFICATION_RECV)) || err == FSM_READ_FAILED || err == FSM_WRITE_FAILED) { err = FSM_GRACEFUL_RESTART log.WithFields(log.Fields{ "Topic": "Peer", diff --git a/server/peer.go b/server/peer.go index 2feb4d86..e9d0fe3e 100644 --- a/server/peer.go +++ b/server/peer.go @@ -40,6 +40,7 @@ type Peer struct { policy *table.RoutingPolicy localRib *table.TableManager prefixLimitWarned map[bgp.RouteFamily]bool + llgrEndChs []chan struct{} } func NewPeer(g *config.Global, conf *config.Neighbor, loc *table.TableManager, policy *table.RoutingPolicy) *Peer { @@ -99,6 +100,25 @@ func (peer *Peer) configuredRFlist() []bgp.RouteFamily { return rfs } +func classifyFamilies(all, part []bgp.RouteFamily) ([]bgp.RouteFamily, []bgp.RouteFamily) { + a := []bgp.RouteFamily{} + b := []bgp.RouteFamily{} + for _, f := range all { + p := true + for _, g := range part { + if f == g { + p = false + a = append(a, f) + break + } + } + if p { + b = append(b, f) + } + } + return a, b +} + func (peer *Peer) forwardingPreservedFamilies() ([]bgp.RouteFamily, []bgp.RouteFamily) { list := []bgp.RouteFamily{} for _, a := range peer.fsm.pConf.AfiSafis { @@ -107,21 +127,83 @@ func (peer *Peer) forwardingPreservedFamilies() ([]bgp.RouteFamily, []bgp.RouteF list = append(list, f) } } - preserved := []bgp.RouteFamily{} - notPreserved := []bgp.RouteFamily{} - for _, f := range peer.configuredRFlist() { - p := true - for _, g := range list { - if f == g { - p = false - preserved = append(preserved, f) + return classifyFamilies(peer.configuredRFlist(), list) +} + +func (peer *Peer) llgrFamilies() ([]bgp.RouteFamily, []bgp.RouteFamily) { + list := []bgp.RouteFamily{} + for _, a := range peer.fsm.pConf.AfiSafis { + if a.LongLivedGracefulRestart.State.Enabled { + f, _ := bgp.GetRouteFamily(string(a.Config.AfiSafiName)) + list = append(list, f) + } + } + return classifyFamilies(peer.configuredRFlist(), list) +} + +func (peer *Peer) isLLGREnabledFamily(family bgp.RouteFamily) bool { + if !peer.fsm.pConf.GracefulRestart.Config.LongLivedEnabled { + return false + } + fs, _ := peer.llgrFamilies() + for _, f := range fs { + if f == family { + return true + } + } + return false +} + +func (peer *Peer) llgrRestartTime(family bgp.RouteFamily) uint32 { + for _, a := range peer.fsm.pConf.AfiSafis { + if f, _ := bgp.GetRouteFamily(string(a.Config.AfiSafiName)); f == family { + return a.LongLivedGracefulRestart.State.PeerRestartTime + } + } + return 0 +} + +func (peer *Peer) llgrRestartTimerExpired(family bgp.RouteFamily) bool { + all := true + for _, a := range peer.fsm.pConf.AfiSafis { + if f, _ := bgp.GetRouteFamily(string(a.Config.AfiSafiName)); f == family { + a.LongLivedGracefulRestart.State.PeerRestartTimerExpired = true + } + s := a.LongLivedGracefulRestart.State + if s.Received && !s.PeerRestartTimerExpired { + all = false + } + } + return all +} + +func (peer *Peer) markLLGRStale(fs []bgp.RouteFamily) []*table.Path { + paths := peer.adjRibIn.PathList(fs, true) + for i, p := range paths { + doStale := true + for _, c := range p.GetCommunities() { + if c == bgp.COMMUNITY_NO_LLGR { + doStale = false + p = p.Clone(true) + break } } - if p { - notPreserved = append(notPreserved, f) + if doStale { + p = p.Clone(false) + p.SetCommunities([]uint32{bgp.COMMUNITY_LLGR_STALE}, false) } + paths[i] = p } - return preserved, notPreserved + return paths +} + +func (peer *Peer) stopPeerRestarting() { + peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false + for _, ch := range peer.llgrEndChs { + close(ch) + } + peer.llgrEndChs = make([]chan struct{}, 0) + } func (peer *Peer) getAccepted(rfList []bgp.RouteFamily) []*table.Path { @@ -159,6 +241,22 @@ func (peer *Peer) filterpath(path *table.Path, withdrawals []*table.Path) *table } path = peer.policy.ApplyPolicy(peer.TableID(), table.POLICY_DIRECTION_EXPORT, path, options) + // draft-uttaro-idr-bgp-persistence-02 + // 4.3. Processing LLGR_STALE Routes + // + // The route SHOULD NOT be advertised to any neighbor from which the + // Long-lived Graceful Restart Capability has not been received. The + // exception is described in the Optional Partial Deployment + // Procedure section (Section 4.7). Note that this requirement + // implies that such routes should be withdrawn from any such neighbor. + if path != nil && !path.IsWithdraw && !peer.isLLGREnabledFamily(path.GetRouteFamily()) && path.IsLLGRStale() { + if peer.adjRibOut.Exists(path) { + path = path.Clone(true) + } else { + return nil + } + } + // remove local-pref attribute // we should do this after applying export policy since policy may // set local-preference diff --git a/server/server.go b/server/server.go index 1d36720b..f92b8be8 100644 --- a/server/server.go +++ b/server/server.go @@ -628,13 +628,72 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) { peer.DropAll(drop) server.dropPeerAllRoutes(peer, drop) } else if peer.fsm.pConf.GracefulRestart.State.PeerRestarting && nextState == bgp.BGP_FSM_IDLE { - // RFC 4724 4.2 - // If the session does not get re-established within the "Restart Time" - // that the peer advertised previously, the Receiving Speaker MUST - // delete all the stale routes from the peer that it is retaining. - peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false - peer.DropAll(peer.configuredRFlist()) - server.dropPeerAllRoutes(peer, peer.configuredRFlist()) + if peer.fsm.pConf.GracefulRestart.State.LongLivedEnabled { + llgr, no_llgr := peer.llgrFamilies() + + peer.DropAll(no_llgr) + server.dropPeerAllRoutes(peer, no_llgr) + + // attach LLGR_STALE community to paths in peer's adj-rib-in + // paths with NO_LLGR are deleted + pathList := peer.markLLGRStale(llgr) + + // calculate again + // wheh path with LLGR_STALE chosen as best, + // peer which doesn't support LLGR will drop the path + // if it is in adj-rib-out, do withdrawal + server.propagateUpdate(peer, pathList) + + for _, f := range llgr { + endCh := make(chan struct{}) + peer.llgrEndChs = append(peer.llgrEndChs, endCh) + go func(family bgp.RouteFamily, endCh chan struct{}) { + t := peer.llgrRestartTime(family) + timer := time.NewTimer(time.Second * time.Duration(t)) + + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.ID(), + "Family": family, + }).Debugf("start LLGR restart timer (%d sec) for %s", t, family) + + select { + case <-timer.C: + ch := make(chan struct{}) + defer func() { <-ch }() + server.mgmtCh <- func() { + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.ID(), + "Family": family, + }).Debugf("LLGR restart timer (%d sec) for %s expired", t, family) + defer close(ch) + peer.DropAll([]bgp.RouteFamily{family}) + server.dropPeerAllRoutes(peer, []bgp.RouteFamily{family}) + + // when all llgr restart timer expired, stop PeerRestarting + if peer.llgrRestartTimerExpired(family) { + peer.stopPeerRestarting() + } + } + case <-endCh: + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.ID(), + "Family": family, + }).Debugf("stop LLGR restart timer (%d sec) for %s", t, family) + } + }(f, endCh) + } + } else { + // RFC 4724 4.2 + // If the session does not get re-established within the "Restart Time" + // that the peer advertised previously, the Receiving Speaker MUST + // delete all the stale routes from the peer that it is retaining. + peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false + peer.DropAll(peer.configuredRFlist()) + server.dropPeerAllRoutes(peer, peer.configuredRFlist()) + } } peer.outgoing.Close() @@ -651,8 +710,7 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) { server.mgmtCh <- func() { defer close(ch) - - server.softResetOut(peer.fsm.pConf.Config.NeighborAddress, bgp.RouteFamily(0), true) + server.softResetOut(peer.fsm.pConf.Config.NeighborAddress, family, true) } } } @@ -832,7 +890,7 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) { } if peer.fsm.pConf.GracefulRestart.State.PeerRestarting { if peer.recvedAllEOR() { - peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false + peer.stopPeerRestarting() pathList := peer.adjRibIn.DropStale(peer.configuredRFlist()) log.WithFields(log.Fields{ "Topic": "Peer", @@ -1613,6 +1671,7 @@ func (server *BgpServer) deleteNeighbor(c *config.Neighbor, code, subcode uint8) }).Infof("Delete a peer configuration for:%s", addr) n.fsm.sendNotification(code, subcode, nil, "") + n.stopPeerRestarting() go func(addr string) { t1 := time.AfterFunc(time.Minute*5, func() { |