diff options
-rw-r--r-- | .travis.yml | 6 | ||||
-rw-r--r-- | config/bgp_configs.go | 97 | ||||
-rw-r--r-- | docs/sources/configuration.md | 12 | ||||
-rw-r--r-- | docs/sources/graceful-restart.md | 99 | ||||
-rw-r--r-- | gobgp/cmd/neighbor.go | 35 | ||||
-rw-r--r-- | server/fsm.go | 32 | ||||
-rw-r--r-- | server/peer.go | 120 | ||||
-rw-r--r-- | server/server.go | 79 | ||||
-rw-r--r-- | table/destination.go | 17 | ||||
-rw-r--r-- | table/path.go | 9 | ||||
-rw-r--r-- | test/lib/base.py | 5 | ||||
-rw-r--r-- | test/lib/gobgp.py | 6 | ||||
-rw-r--r-- | test/scenario_test/long_lived_graceful_restart_test.py | 170 | ||||
-rw-r--r-- | tools/pyang_plugins/gobgp.yang | 38 |
14 files changed, 695 insertions, 30 deletions
diff --git a/.travis.yml b/.travis.yml index 7de95b3e..343c8613 100644 --- a/.travis.yml +++ b/.travis.yml @@ -134,7 +134,11 @@ matrix: sudo: required services: - docker - + - env: + - TEST=long_lived_graceful_restart_test.py + sudo: required + services: + - docker cache: pip: true diff --git a/config/bgp_configs.go b/config/bgp_configs.go index f56a3750..d526184d 100644 --- a/config/bgp_configs.go +++ b/config/bgp_configs.go @@ -2802,6 +2802,86 @@ func (lhs *MplsLabelRange) Equal(rhs *MplsLabelRange) bool { } //struct for container gobgp:state +type LongLivedGracefulRestartState struct { + // original -> gobgp:enabled + //gobgp:enabled's original type is boolean + Enabled bool `mapstructure:"enabled"` + // original -> gobgp:received + //gobgp:received's original type is boolean + Received bool `mapstructure:"received"` + // original -> gobgp:advertised + //gobgp:advertised's original type is boolean + Advertised bool `mapstructure:"advertised"` + // original -> gobgp:peer-restart-time + PeerRestartTime uint32 `mapstructure:"peer-restart-time"` + // original -> gobgp:peer-restart-timer-expired + //gobgp:peer-restart-timer-expired's original type is boolean + PeerRestartTimerExpired bool `mapstructure:"peer-restart-timer-expired"` +} + +func (lhs *LongLivedGracefulRestartState) Equal(rhs *LongLivedGracefulRestartState) bool { + if lhs == nil || rhs == nil { + return false + } + if lhs.Enabled != rhs.Enabled { + return false + } + if lhs.Received != rhs.Received { + return false + } + if lhs.Advertised != rhs.Advertised { + return false + } + if lhs.PeerRestartTime != rhs.PeerRestartTime { + return false + } + if lhs.PeerRestartTimerExpired != rhs.PeerRestartTimerExpired { + return false + } + return true +} + +//struct for container gobgp:config +type LongLivedGracefulRestartConfig struct { + // original -> gobgp:enabled + //gobgp:enabled's original type is boolean + Enabled bool `mapstructure:"enabled"` + // original -> gobgp:restart-time + RestartTime uint32 `mapstructure:"restart-time"` +} + +func (lhs *LongLivedGracefulRestartConfig) Equal(rhs *LongLivedGracefulRestartConfig) bool { + if lhs == nil || rhs == nil { + return false + } + if lhs.Enabled != rhs.Enabled { + return false + } + if lhs.RestartTime != rhs.RestartTime { + return false + } + return true +} + +//struct for container gobgp:long-lived-graceful-restart +type LongLivedGracefulRestart struct { + // original -> gobgp:long-lived-graceful-restart-config + Config LongLivedGracefulRestartConfig `mapstructure:"config"` + // original -> gobgp:long-lived-graceful-restart-state + State LongLivedGracefulRestartState `mapstructure:"state"` +} + +func (lhs *LongLivedGracefulRestart) Equal(rhs *LongLivedGracefulRestart) bool { + if lhs == nil || rhs == nil { + return false + } + if !lhs.Config.Equal(&(rhs.Config)) { + return false + } + return true +} + +//struct for container gobgp:state type RouteTargetMembershipState struct { // original -> gobgp:deferral-time DeferralTime uint16 `mapstructure:"deferral-time"` @@ -3464,6 +3544,8 @@ type AfiSafi struct { PrefixLimit PrefixLimit `mapstructure:"prefix-limit"` // original -> gobgp:route-target-membership RouteTargetMembership RouteTargetMembership `mapstructure:"route-target-membership"` + // original -> gobgp:long-lived-graceful-restart + LongLivedGracefulRestart LongLivedGracefulRestart `mapstructure:"long-lived-graceful-restart"` } func (lhs *AfiSafi) Equal(rhs *AfiSafi) bool { @@ -3521,6 +3603,9 @@ func (lhs *AfiSafi) Equal(rhs *AfiSafi) bool { if !lhs.RouteTargetMembership.Equal(&(rhs.RouteTargetMembership)) { return false } + if !lhs.LongLivedGracefulRestart.Equal(&(rhs.LongLivedGracefulRestart)) { + return false + } return true } @@ -3552,6 +3637,9 @@ type GracefulRestartState struct { // original -> gobgp:notification-enabled //gobgp:notification-enabled's original type is boolean NotificationEnabled bool `mapstructure:"notification-enabled"` + // original -> gobgp:long-lived-enabled + //gobgp:long-lived-enabled's original type is boolean + LongLivedEnabled bool `mapstructure:"long-lived-enabled"` } func (lhs *GracefulRestartState) Equal(rhs *GracefulRestartState) bool { @@ -3588,6 +3676,9 @@ func (lhs *GracefulRestartState) Equal(rhs *GracefulRestartState) bool { if lhs.NotificationEnabled != rhs.NotificationEnabled { return false } + if lhs.LongLivedEnabled != rhs.LongLivedEnabled { + return false + } return true } @@ -3609,6 +3700,9 @@ type GracefulRestartConfig struct { // original -> gobgp:notification-enabled //gobgp:notification-enabled's original type is boolean NotificationEnabled bool `mapstructure:"notification-enabled"` + // original -> gobgp:long-lived-enabled + //gobgp:long-lived-enabled's original type is boolean + LongLivedEnabled bool `mapstructure:"long-lived-enabled"` } func (lhs *GracefulRestartConfig) Equal(rhs *GracefulRestartConfig) bool { @@ -3633,6 +3727,9 @@ func (lhs *GracefulRestartConfig) Equal(rhs *GracefulRestartConfig) bool { if lhs.NotificationEnabled != rhs.NotificationEnabled { return false } + if lhs.LongLivedEnabled != rhs.LongLivedEnabled { + return false + } return true } diff --git a/docs/sources/configuration.md b/docs/sources/configuration.md index d383023a..56c14a78 100644 --- a/docs/sources/configuration.md +++ b/docs/sources/configuration.md @@ -63,12 +63,24 @@ [neighbors.route-reflector.config] route-reflector-client = true route-reflector-cluster-id = "192.168.0.1" + [neighbors.graceful-restart.config] + enabled = true + notification-enabled = true + long-lived-enabled = true + # graceful restart restart time + restart-time = 20 [[neighbors.afi-safis]] [neighbors.afi-safis.config] afi-safi-name = "ipv4-unicast" [neighbors.afi-safis.prefix-limit.config] max-prefixes = 1000 shutdown-threshold-pct = 80 + [neighbors.afi-safis.mp-graceful-restart.config] + enabled = true + [neighbors.afi-safis.long-lived-graceful-restart.config] + enabled = true + # long lived graceful restart restart time + restart-time = 100000 [[neighbors.afi-safis]] [neighbors.afi-safis.config] afi-safi-name = "ipv6-unicast" diff --git a/docs/sources/graceful-restart.md b/docs/sources/graceful-restart.md index 6090eaec..5abeecc9 100644 --- a/docs/sources/graceful-restart.md +++ b/docs/sources/graceful-restart.md @@ -1,6 +1,8 @@ # Graceful Restart -This page explains how to configure [Graceful Restart](https://tools.ietf.org/html/rfc4724). +This page explains how to configure [Graceful Restart](https://tools.ietf.org/html/rfc4724), +[Graceful Restart Notification Support](https://tools.ietf.org/html/draft-ietf-idr-bgp-gr-notification-07) and +[Long Lived Graceful Restart](https://tools.ietf.org/html/draft-uttaro-idr-bgp-persistence-02). Graceful Restart has two sides. One is restarting speaker which does restart, the other is receiving speaker (helper speaker) which helps a restarting speaker to do graceful restart. GoBGP supports both roles. @@ -9,6 +11,8 @@ to do graceful restart. GoBGP supports both roles. - [Helper speaker](#helper) - [Restarting speaker](#restarting) +- [Graceful Restart Notification Support](#notification) +- [Long Lived Graceful Restart](#long-lived) ## <a name="helper"> Helper speaker @@ -140,3 +144,96 @@ immediately withdraw all routes which were advertised from `gobgpd`. Also, when `gobgpd` doesn't recovered within `restart-time`, the peers will withdraw all routes. Default value of `restart-time` is equal to `hold-time`. + +## <a name="notification"> Graceful Restart Notification Support + +[RFC4724](https://tools.ietf.org/html/rfc4724) specifies gracful restart procedures are triggered only when +the BGP session between graceful restart capable peers turns down without +a notification message for backward compatibility. +[Graceful Restart Notification Support](https://tools.ietf.org/html/draft-ietf-idr-bgp-gr-notification-07) +expands this to trigger graceful restart procedures also with a notification message. +To turn on this feature, add `notification-enabled = true` to configuration like below. + +```toml +[global.config] + as = 64512 + router-id = "192.168.255.1" + +[[neighbors]] + [neighbors.config] + neighbor-address = "10.0.255.1" + peer-as = 65001 + [neighbors.graceful-restart.config] + enabled = true + notification-enabled = true +``` + +## <a name="long-lived"> Long Lived Graceful Restart + +### Long Lived Graceful Restart Helper Speaker Configuration + +```toml +[global.config] + as = 64512 + router-id = "192.168.255.1" + +[[neighbors]] + [neighbors.config] + neighbor-address = "10.0.255.1" + peer-as = 65001 + [neighbors.graceful-restart.config] + enabled = true + long-lived-enabled = true +``` + +### Long Lived Graceful Restart Restarting Speaker Configuration + +Unlike normal graceful restart, long-lived graceful restart supports +restart-time as per address family. + +```toml +[global.config] + as = 64512 + router-id = "192.168.255.1" + +[[neighbors]] + [neighbors.config] + neighbor-address = "10.0.255.1" + peer-as = 65001 + [neighbors.graceful-restart.config] + enabled = true + long-lived-enabled = true + [[neighbors.afi-safis]] + [neighbors.afi-safis.config] + afi-safi-name = "ipv4-unicast" + [neighbors.afi-safis.long-lived-graceful-restart.config] + enabled = true + restart-time = 100000 +``` + +### Conbination with normal Graceful Restart + +You can also use long lived graceful restart with normal graceful restart. + +```toml +[global.config] + as = 64512 + router-id = "192.168.255.1" + +[[neighbors]] + [neighbors.config] + neighbor-address = "10.0.255.1" + peer-as = 65001 + [neighbors.graceful-restart.config] + enabled = true + long-lived-enabled = true + restart-time = 120 + [[neighbors.afi-safis]] + [neighbors.afi-safis.config] + afi-safi-name = "ipv4-unicast" + [neighbors.afi-safis.mp-graceful-restart.config] + enabled = true + [neighbors.afi-safis.long-lived-graceful-restart.config] + enabled = true + restart-time = 100000 +``` diff --git a/gobgp/cmd/neighbor.go b/gobgp/cmd/neighbor.go index 84cb8d26..eea7e423 100644 --- a/gobgp/cmd/neighbor.go +++ b/gobgp/cmd/neighbor.go @@ -243,12 +243,19 @@ func showNeighbor(args []string) error { if len(g.Tuples) > 0 { str += fmt.Sprintf("restart time %d sec", g.Time) } - if g.Flags == 0x08 { + if g.Flags&0x08 > 0 { if len(str) > 0 { str += ", " } str += "restart flag set" } + if g.Flags&0x04 > 0 { + if len(str) > 0 { + str += ", " + } + str += "notification flag set" + } + if len(str) > 0 { str += "\n" } @@ -273,6 +280,32 @@ func showNeighbor(args []string) error { fmt.Printf(" Remote: %s", s) } } + case bgp.BGP_CAP_LONG_LIVED_GRACEFUL_RESTART: + fmt.Printf(" %s:\t%s\n", c.Code(), support) + grStr := func(g *bgp.CapLongLivedGracefulRestart) string { + var str string + for _, t := range g.Tuples { + str += fmt.Sprintf(" %s, restart time %d sec", bgp.AfiSafiToRouteFamily(t.AFI, t.SAFI), t.RestartTime) + if t.Flags == 0x80 { + str += ", forward flag set" + } + str += "\n" + } + return str + } + if m := lookup(c, p.Conf.LocalCap); m != nil { + g := m.(*bgp.CapLongLivedGracefulRestart) + if s := grStr(g); len(s) > 0 { + fmt.Printf(" Local:\n%s", s) + } + } + if m := lookup(c, p.Conf.RemoteCap); m != nil { + g := m.(*bgp.CapLongLivedGracefulRestart) + if s := grStr(g); len(s) > 0 { + fmt.Printf(" Remote:\n%s", s) + } + } + default: fmt.Printf(" %s:\t%s\n", c.Code(), support) } diff --git a/server/fsm.go b/server/fsm.go index d86a5ff1..c8ae658b 100644 --- a/server/fsm.go +++ b/server/fsm.go @@ -27,6 +27,7 @@ import ( "math/rand" "net" "strconv" + "strings" "time" ) @@ -521,6 +522,7 @@ func capabilitiesFromConfig(pConf *config.Neighbor) []bgp.ParameterCapabilityInt if c := pConf.GracefulRestart.Config; c.Enabled { tuples := []*bgp.CapGracefulRestartTuple{} + ltuples := []*bgp.CapLongLivedGracefulRestartTuple{} // RFC 4724 4.1 // To re-establish the session with its peer, the Restarting Speaker @@ -530,8 +532,8 @@ func capabilitiesFromConfig(pConf *config.Neighbor) []bgp.ParameterCapabilityInt if !c.HelperOnly { for i, rf := range pConf.AfiSafis { - if rf.MpGracefulRestart.Config.Enabled { - k, _ := bgp.GetRouteFamily(string(rf.Config.AfiSafiName)) + k, _ := bgp.GetRouteFamily(string(rf.Config.AfiSafiName)) + if m := rf.MpGracefulRestart.Config; m.Enabled { // When restarting, always flag forwaring bit. // This can be a lie, depending on how gobgpd is used. // For a route-server use-case, since a route-server @@ -542,11 +544,17 @@ func capabilitiesFromConfig(pConf *config.Neighbor) []bgp.ParameterCapabilityInt tuples = append(tuples, bgp.NewCapGracefulRestartTuple(k, restarting)) pConf.AfiSafis[i].MpGracefulRestart.State.Advertised = true } + if m := rf.LongLivedGracefulRestart.Config; m.Enabled { + ltuples = append(ltuples, bgp.NewCapLongLivedGracefulRestartTuple(k, restarting, m.RestartTime)) + } } } time := c.RestartTime notification := c.NotificationEnabled caps = append(caps, bgp.NewCapGracefulRestart(restarting, notification, time, tuples)) + if c.LongLivedEnabled { + caps = append(caps, bgp.NewCapLongLivedGracefulRestart(ltuples)) + } } return caps } @@ -695,7 +703,7 @@ func (h *FSMHandler) recvMessageWithError() (*FsmMsg, error) { "Subcode": body.ErrorSubcode, "Data": body.Data, }).Warn("received notification") - if body.ErrorCode == bgp.BGP_ERROR_CEASE && body.ErrorSubcode == bgp.BGP_ERROR_SUB_HARD_RESET { + if s := h.fsm.pConf.GracefulRestart.State; s.Enabled && s.NotificationEnabled && body.ErrorCode == bgp.BGP_ERROR_CEASE && body.ErrorSubcode == bgp.BGP_ERROR_SUB_HARD_RESET { sendToErrorCh(FSM_HARD_RESET) } else { sendToErrorCh(FsmStateReason(fmt.Sprintf("%s %s", FSM_NOTIFICATION_RECV, bgp.NewNotificationErrorCode(body.ErrorCode, body.ErrorSubcode).String()))) @@ -866,6 +874,22 @@ func (h *FSMHandler) opensent() (bgp.FSMState, FsmStateReason) { fsm.pConf.GracefulRestart.State.NotificationEnabled = true } } + llgr, ok2 := fsm.capMap[bgp.BGP_CAP_LONG_LIVED_GRACEFUL_RESTART] + if fsm.pConf.GracefulRestart.Config.LongLivedEnabled && ok && ok2 { + fsm.pConf.GracefulRestart.State.LongLivedEnabled = true + cap := llgr[len(llgr)-1].(*bgp.CapLongLivedGracefulRestart) + for _, t := range cap.Tuples { + n := bgp.AddressFamilyNameMap[bgp.AfiSafiToRouteFamily(t.AFI, t.SAFI)] + for i, a := range fsm.pConf.AfiSafis { + if string(a.Config.AfiSafiName) == n { + fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.Enabled = true + fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.Received = true + fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.PeerRestartTime = t.RestartTime + break + } + } + } + } msg := bgp.NewBGPKeepAliveMessage() b, _ := msg.Serialize() @@ -1178,7 +1202,7 @@ func (h *FSMHandler) established() (bgp.FSMState, FsmStateReason) { case err := <-h.errorCh: h.conn.Close() h.t.Kill(nil) - if s := fsm.pConf.GracefulRestart.State; s.Enabled && ((s.NotificationEnabled && err != FSM_HARD_RESET) || err == FSM_READ_FAILED || err == FSM_WRITE_FAILED) { + if s := fsm.pConf.GracefulRestart.State; s.Enabled && ((s.NotificationEnabled && strings.HasPrefix(string(err), FSM_NOTIFICATION_RECV)) || err == FSM_READ_FAILED || err == FSM_WRITE_FAILED) { err = FSM_GRACEFUL_RESTART log.WithFields(log.Fields{ "Topic": "Peer", diff --git a/server/peer.go b/server/peer.go index 2feb4d86..e9d0fe3e 100644 --- a/server/peer.go +++ b/server/peer.go @@ -40,6 +40,7 @@ type Peer struct { policy *table.RoutingPolicy localRib *table.TableManager prefixLimitWarned map[bgp.RouteFamily]bool + llgrEndChs []chan struct{} } func NewPeer(g *config.Global, conf *config.Neighbor, loc *table.TableManager, policy *table.RoutingPolicy) *Peer { @@ -99,6 +100,25 @@ func (peer *Peer) configuredRFlist() []bgp.RouteFamily { return rfs } +func classifyFamilies(all, part []bgp.RouteFamily) ([]bgp.RouteFamily, []bgp.RouteFamily) { + a := []bgp.RouteFamily{} + b := []bgp.RouteFamily{} + for _, f := range all { + p := true + for _, g := range part { + if f == g { + p = false + a = append(a, f) + break + } + } + if p { + b = append(b, f) + } + } + return a, b +} + func (peer *Peer) forwardingPreservedFamilies() ([]bgp.RouteFamily, []bgp.RouteFamily) { list := []bgp.RouteFamily{} for _, a := range peer.fsm.pConf.AfiSafis { @@ -107,21 +127,83 @@ func (peer *Peer) forwardingPreservedFamilies() ([]bgp.RouteFamily, []bgp.RouteF list = append(list, f) } } - preserved := []bgp.RouteFamily{} - notPreserved := []bgp.RouteFamily{} - for _, f := range peer.configuredRFlist() { - p := true - for _, g := range list { - if f == g { - p = false - preserved = append(preserved, f) + return classifyFamilies(peer.configuredRFlist(), list) +} + +func (peer *Peer) llgrFamilies() ([]bgp.RouteFamily, []bgp.RouteFamily) { + list := []bgp.RouteFamily{} + for _, a := range peer.fsm.pConf.AfiSafis { + if a.LongLivedGracefulRestart.State.Enabled { + f, _ := bgp.GetRouteFamily(string(a.Config.AfiSafiName)) + list = append(list, f) + } + } + return classifyFamilies(peer.configuredRFlist(), list) +} + +func (peer *Peer) isLLGREnabledFamily(family bgp.RouteFamily) bool { + if !peer.fsm.pConf.GracefulRestart.Config.LongLivedEnabled { + return false + } + fs, _ := peer.llgrFamilies() + for _, f := range fs { + if f == family { + return true + } + } + return false +} + +func (peer *Peer) llgrRestartTime(family bgp.RouteFamily) uint32 { + for _, a := range peer.fsm.pConf.AfiSafis { + if f, _ := bgp.GetRouteFamily(string(a.Config.AfiSafiName)); f == family { + return a.LongLivedGracefulRestart.State.PeerRestartTime + } + } + return 0 +} + +func (peer *Peer) llgrRestartTimerExpired(family bgp.RouteFamily) bool { + all := true + for _, a := range peer.fsm.pConf.AfiSafis { + if f, _ := bgp.GetRouteFamily(string(a.Config.AfiSafiName)); f == family { + a.LongLivedGracefulRestart.State.PeerRestartTimerExpired = true + } + s := a.LongLivedGracefulRestart.State + if s.Received && !s.PeerRestartTimerExpired { + all = false + } + } + return all +} + +func (peer *Peer) markLLGRStale(fs []bgp.RouteFamily) []*table.Path { + paths := peer.adjRibIn.PathList(fs, true) + for i, p := range paths { + doStale := true + for _, c := range p.GetCommunities() { + if c == bgp.COMMUNITY_NO_LLGR { + doStale = false + p = p.Clone(true) + break } } - if p { - notPreserved = append(notPreserved, f) + if doStale { + p = p.Clone(false) + p.SetCommunities([]uint32{bgp.COMMUNITY_LLGR_STALE}, false) } + paths[i] = p } - return preserved, notPreserved + return paths +} + +func (peer *Peer) stopPeerRestarting() { + peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false + for _, ch := range peer.llgrEndChs { + close(ch) + } + peer.llgrEndChs = make([]chan struct{}, 0) + } func (peer *Peer) getAccepted(rfList []bgp.RouteFamily) []*table.Path { @@ -159,6 +241,22 @@ func (peer *Peer) filterpath(path *table.Path, withdrawals []*table.Path) *table } path = peer.policy.ApplyPolicy(peer.TableID(), table.POLICY_DIRECTION_EXPORT, path, options) + // draft-uttaro-idr-bgp-persistence-02 + // 4.3. Processing LLGR_STALE Routes + // + // The route SHOULD NOT be advertised to any neighbor from which the + // Long-lived Graceful Restart Capability has not been received. The + // exception is described in the Optional Partial Deployment + // Procedure section (Section 4.7). Note that this requirement + // implies that such routes should be withdrawn from any such neighbor. + if path != nil && !path.IsWithdraw && !peer.isLLGREnabledFamily(path.GetRouteFamily()) && path.IsLLGRStale() { + if peer.adjRibOut.Exists(path) { + path = path.Clone(true) + } else { + return nil + } + } + // remove local-pref attribute // we should do this after applying export policy since policy may // set local-preference diff --git a/server/server.go b/server/server.go index 1d36720b..f92b8be8 100644 --- a/server/server.go +++ b/server/server.go @@ -628,13 +628,72 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) { peer.DropAll(drop) server.dropPeerAllRoutes(peer, drop) } else if peer.fsm.pConf.GracefulRestart.State.PeerRestarting && nextState == bgp.BGP_FSM_IDLE { - // RFC 4724 4.2 - // If the session does not get re-established within the "Restart Time" - // that the peer advertised previously, the Receiving Speaker MUST - // delete all the stale routes from the peer that it is retaining. - peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false - peer.DropAll(peer.configuredRFlist()) - server.dropPeerAllRoutes(peer, peer.configuredRFlist()) + if peer.fsm.pConf.GracefulRestart.State.LongLivedEnabled { + llgr, no_llgr := peer.llgrFamilies() + + peer.DropAll(no_llgr) + server.dropPeerAllRoutes(peer, no_llgr) + + // attach LLGR_STALE community to paths in peer's adj-rib-in + // paths with NO_LLGR are deleted + pathList := peer.markLLGRStale(llgr) + + // calculate again + // wheh path with LLGR_STALE chosen as best, + // peer which doesn't support LLGR will drop the path + // if it is in adj-rib-out, do withdrawal + server.propagateUpdate(peer, pathList) + + for _, f := range llgr { + endCh := make(chan struct{}) + peer.llgrEndChs = append(peer.llgrEndChs, endCh) + go func(family bgp.RouteFamily, endCh chan struct{}) { + t := peer.llgrRestartTime(family) + timer := time.NewTimer(time.Second * time.Duration(t)) + + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.ID(), + "Family": family, + }).Debugf("start LLGR restart timer (%d sec) for %s", t, family) + + select { + case <-timer.C: + ch := make(chan struct{}) + defer func() { <-ch }() + server.mgmtCh <- func() { + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.ID(), + "Family": family, + }).Debugf("LLGR restart timer (%d sec) for %s expired", t, family) + defer close(ch) + peer.DropAll([]bgp.RouteFamily{family}) + server.dropPeerAllRoutes(peer, []bgp.RouteFamily{family}) + + // when all llgr restart timer expired, stop PeerRestarting + if peer.llgrRestartTimerExpired(family) { + peer.stopPeerRestarting() + } + } + case <-endCh: + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": peer.ID(), + "Family": family, + }).Debugf("stop LLGR restart timer (%d sec) for %s", t, family) + } + }(f, endCh) + } + } else { + // RFC 4724 4.2 + // If the session does not get re-established within the "Restart Time" + // that the peer advertised previously, the Receiving Speaker MUST + // delete all the stale routes from the peer that it is retaining. + peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false + peer.DropAll(peer.configuredRFlist()) + server.dropPeerAllRoutes(peer, peer.configuredRFlist()) + } } peer.outgoing.Close() @@ -651,8 +710,7 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) { server.mgmtCh <- func() { defer close(ch) - - server.softResetOut(peer.fsm.pConf.Config.NeighborAddress, bgp.RouteFamily(0), true) + server.softResetOut(peer.fsm.pConf.Config.NeighborAddress, family, true) } } } @@ -832,7 +890,7 @@ func (server *BgpServer) handleFSMMessage(peer *Peer, e *FsmMsg) { } if peer.fsm.pConf.GracefulRestart.State.PeerRestarting { if peer.recvedAllEOR() { - peer.fsm.pConf.GracefulRestart.State.PeerRestarting = false + peer.stopPeerRestarting() pathList := peer.adjRibIn.DropStale(peer.configuredRFlist()) log.WithFields(log.Fields{ "Topic": "Peer", @@ -1613,6 +1671,7 @@ func (server *BgpServer) deleteNeighbor(c *config.Neighbor, code, subcode uint8) }).Infof("Delete a peer configuration for:%s", addr) n.fsm.sendNotification(code, subcode, nil, "") + n.stopPeerRestarting() go func(addr string) { t1 := time.AfterFunc(time.Minute*5, func() { diff --git a/table/destination.go b/table/destination.go index 58991a35..50b946e9 100644 --- a/table/destination.go +++ b/table/destination.go @@ -47,6 +47,7 @@ const ( BPR_IGP_COST BestPathReason = "IGP Cost" BPR_ROUTER_ID BestPathReason = "Router ID" BPR_OLDER BestPathReason = "Older" + BPR_NON_LLGR_STALE BestPathReason = "no LLGR Stale" ) func IpToRadixkey(b []byte, max uint8) string { @@ -479,6 +480,11 @@ func (p paths) Less(i, j int) bool { var better *Path reason := BPR_UNKNOWN + // draft-uttaro-idr-bgp-persistence-02 + if better == nil { + better = compareByLLGRStaleCommunity(path1, path2) + reason = BPR_NON_LLGR_STALE + } // Follow best path calculation algorithm steps. // compare by reachability if better == nil { @@ -545,6 +551,17 @@ func (p paths) Less(i, j int) bool { return false } +func compareByLLGRStaleCommunity(path1, path2 *Path) *Path { + p1 := path1.IsLLGRStale() + p2 := path2.IsLLGRStale() + if p1 == p2 { + return nil + } else if p1 { + return path2 + } + return path1 +} + func compareByReachableNexthop(path1, path2 *Path) *Path { // Compares given paths and selects best path based on reachable next-hop. // diff --git a/table/path.go b/table/path.go index b0f21e04..f8058730 100644 --- a/table/path.go +++ b/table/path.go @@ -348,6 +348,15 @@ func (path *Path) IsStale() bool { return path.OriginInfo().stale } +func (path *Path) IsLLGRStale() bool { + for _, c := range path.GetCommunities() { + if c == bgp.COMMUNITY_LLGR_STALE { + return true + } + } + return false +} + func (path *Path) GetSourceAs() uint32 { attr := path.getPathAttr(bgp.BGP_ATTR_TYPE_AS_PATH) if attr != nil { diff --git a/test/lib/base.py b/test/lib/base.py index 5ccf952e..3d14cf4c 100644 --- a/test/lib/base.py +++ b/test/lib/base.py @@ -280,7 +280,7 @@ class BGPContainer(Container): is_rr_client=False, cluster_id=None, flowspec=False, bridge='', reload_config=True, as2=False, graceful_restart=None, local_as=None, prefix_limit=None, - v6=False): + v6=False, llgr=None): neigh_addr = '' local_addr = '' it = itertools.product(self.ip_addrs, peer.ip_addrs) @@ -317,7 +317,8 @@ class BGPContainer(Container): 'as2': as2, 'graceful_restart': graceful_restart, 'local_as': local_as, - 'prefix_limit': prefix_limit} + 'prefix_limit': prefix_limit, + 'llgr': llgr} if self.is_running and reload_config: self.create_config() self.reload_config() diff --git a/test/lib/gobgp.py b/test/lib/gobgp.py index b8e669dd..77060893 100644 --- a/test/lib/gobgp.py +++ b/test/lib/gobgp.py @@ -306,6 +306,12 @@ class GoBGPContainer(BGPContainer): for afi_safi in afi_safi_list: afi_safi['mp-graceful-restart'] = {'config': {'enabled': True}} + if info['llgr'] is not None: + n['graceful-restart']['config']['restart-time'] = 1 + n['graceful-restart']['config']['long-lived-enabled'] = True + for afi_safi in afi_safi_list: + afi_safi['long-lived-graceful-restart'] = {'config': {'enabled': True, 'restart-time': 30}} + if info['is_rr_client']: clusterId = self.router_id if 'cluster_id' in info and info['cluster_id'] is not None: diff --git a/test/scenario_test/long_lived_graceful_restart_test.py b/test/scenario_test/long_lived_graceful_restart_test.py new file mode 100644 index 00000000..e0c51586 --- /dev/null +++ b/test/scenario_test/long_lived_graceful_restart_test.py @@ -0,0 +1,170 @@ +# Copyright (C) 2016 Nippon Telegraph and Telephone Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from fabric.api import local +from lib import base +from lib.gobgp import * +from lib.quagga import * +import sys +import os +import time +import nose +from noseplugin import OptionParser, parser_option +from itertools import chain + + +class GoBGPTestBase(unittest.TestCase): + + @classmethod + def setUpClass(cls): + gobgp_ctn_image_name = parser_option.gobgp_image + base.TEST_PREFIX = parser_option.test_prefix + + g1 = GoBGPContainer(name='g1', asn=65000, router_id='192.168.0.1', + ctn_image_name=gobgp_ctn_image_name, + log_level=parser_option.gobgp_log_level) + g2 = GoBGPContainer(name='g2', asn=65001, router_id='192.168.0.2', + ctn_image_name=gobgp_ctn_image_name, + log_level=parser_option.gobgp_log_level) + g3 = GoBGPContainer(name='g3', asn=65002, router_id='192.168.0.3', + ctn_image_name=gobgp_ctn_image_name, + log_level=parser_option.gobgp_log_level) + g4 = GoBGPContainer(name='g4', asn=65003, router_id='192.168.0.4', + ctn_image_name=gobgp_ctn_image_name, + log_level=parser_option.gobgp_log_level) + ctns = [g1, g2, g3, g4] + + initial_wait_time = max(ctn.run() for ctn in ctns) + + time.sleep(initial_wait_time) + + g1.add_peer(g2, graceful_restart=True, llgr=True) + g2.add_peer(g1, graceful_restart=True, llgr=True) + g1.add_peer(g3, graceful_restart=True, llgr=True) + g3.add_peer(g1, graceful_restart=True, llgr=True) + g1.add_peer(g4, graceful_restart=True) + g4.add_peer(g1, graceful_restart=True) + + cls.bgpds = {'g1': g1, 'g2': g2, 'g3': g3, 'g4': g4} + + # test each neighbor state is turned establish + def test_01_neighbor_established(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g4 = self.bgpds['g4'] + g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g2) + g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g3) + g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g4) + + def test_02_graceful_restart(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g4 = self.bgpds['g4'] + + g2.local('gobgp global rib add 10.0.0.0/24') + g2.local('gobgp global rib add 10.10.0.0/24 community no-llgr') + + time.sleep(1) + + g2.graceful_restart() + g1.wait_for(expected_state=BGP_FSM_ACTIVE, peer=g2) + + time.sleep(1) + + self.assertTrue(len(g1.get_global_rib('10.0.0.0/24')) == 1) + # 10.10.0.0/24 is announced with no-llgr community + # must not exist in the rib + self.assertTrue(len(g1.get_global_rib('10.10.0.0/24')) == 0) + for d in g1.get_global_rib(): + for p in d['paths']: + self.assertTrue(p['stale']) + + self.assertTrue(len(g3.get_global_rib('10.0.0.0/24')) == 1) + # check llgr-stale community is added to 10.0.0.0/24 + r = g3.get_global_rib('10.0.0.0/24')[0]['paths'][0] + comms = list(chain.from_iterable([ attr['communities'] for attr in r['attrs'] if attr['type'] == 8])) + self.assertTrue(0xffff0006 in comms) + # g4 is not llgr capable, llgr-stale route must be + # withdrawn + self.assertTrue(len(g4.get_global_rib('10.0.0.0/24')) == 0) + + g2._start_gobgp(graceful_restart=True) + time.sleep(2) + g2.local('gobgp global rib add 10.0.0.0/24') + g2.local('gobgp global rib add 10.10.0.0/24') + + def test_03_neighbor_established(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g4 = self.bgpds['g4'] + g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g2) + time.sleep(1) + self.assertTrue(len(g1.get_global_rib('10.0.0.0/24')) == 1) + self.assertTrue(len(g1.get_global_rib('10.10.0.0/24')) == 1) + for d in g1.get_global_rib(): + for p in d['paths']: + self.assertFalse(p.get('stale', False)) + + def test_04_llgr_stale_route_depreferenced(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g4 = self.bgpds['g4'] + g4.local('gobgp global rib add 10.0.0.0/24 med 100') + time.sleep(1) + # check g2's path is chosen as best and advertised + rib = g3.get_global_rib('10.0.0.0/24') + self.assertTrue(len(rib) == 1) + self.assertTrue(g2.asn in rib[0]['paths'][0]['aspath']) + + g2.graceful_restart() + g1.wait_for(expected_state=BGP_FSM_ACTIVE, peer=g2) + + time.sleep(1) + + # llgr_stale route depreference must happend + # check g4's path is chosen as best and advertised + rib = g3.get_global_rib('10.0.0.0/24') + self.assertTrue(len(rib) == 1) + self.assertTrue(g4.asn in rib[0]['paths'][0]['aspath']) + + # if no candidate exists, llgr_stale route will be chosen as best + rib = g3.get_global_rib('10.10.0.0/24') + self.assertTrue(len(rib) == 1) + self.assertTrue(g2.asn in rib[0]['paths'][0]['aspath']) + + + def test_05_llgr_restart_timer_expire(self): + time.sleep(35) + g3 = self.bgpds['g3'] + rib = g3.get_global_rib('10.10.0.0/24') + self.assertTrue(len(rib) == 0) + + +if __name__ == '__main__': + if os.geteuid() is not 0: + print "you are not root." + sys.exit(1) + output = local("which docker 2>&1 > /dev/null ; echo $?", capture=True) + if int(output) is not 0: + print "docker not found" + sys.exit(1) + + nose.main(argv=sys.argv, addplugins=[OptionParser()], + defaultTest=sys.argv[0]) diff --git a/tools/pyang_plugins/gobgp.yang b/tools/pyang_plugins/gobgp.yang index 72c766ab..469ecc48 100644 --- a/tools/pyang_plugins/gobgp.yang +++ b/tools/pyang_plugins/gobgp.yang @@ -585,6 +585,36 @@ module gobgp { } } + grouping long-lived-graceful-restart { + container long-lived-graceful-restart { + container config { + leaf enabled { + type boolean; + } + leaf restart-time { + type uint32; + } + } + container state { + leaf enabled { + type boolean; + } + leaf received { + type boolean; + } + leaf advertised { + type boolean; + } + leaf peer-restart-time { + type uint32; + } + leaf peer-restart-timer-expired { + type boolean; + } + } + } + } + // augment statements augment "/bgp:bgp/bgp:neighbors/bgp:neighbor/bgp:state/bgp:messages/bgp:sent" { description "additional counters"; @@ -708,6 +738,9 @@ module gobgp { leaf notification-enabled { type boolean; } + leaf long-lived-enabled { + type boolean; + } } augment "/bgp:bgp/bgp:neighbors/bgp:neighbor/bgp:graceful-restart/bgp:state" { @@ -718,6 +751,9 @@ module gobgp { leaf notification-enabled { type boolean; } + leaf long-lived-enabled { + type boolean; + } } augment "/bgp:bgp/bgp:peer-groups/bgp:peer-group" { @@ -1013,5 +1049,7 @@ module gobgp { uses route-target-membership-config; } } + + uses long-lived-graceful-restart; } } |