diff options
-rw-r--r-- | pkg/server/fsm.go | 33 | ||||
-rw-r--r-- | pkg/server/fsm_test.go | 46 | ||||
-rw-r--r-- | pkg/server/server.go | 6 | ||||
-rw-r--r-- | test/scenario_test/graceful_restart_test.py | 43 | ||||
-rw-r--r-- | test/scenario_test/long_lived_graceful_restart_test.py | 71 |
5 files changed, 169 insertions, 30 deletions
diff --git a/pkg/server/fsm.go b/pkg/server/fsm.go index 39618aa5..b037c0f5 100644 --- a/pkg/server/fsm.go +++ b/pkg/server/fsm.go @@ -1788,17 +1788,20 @@ func (h *fsmHandler) established(ctx context.Context) (bgp.FSMState, *fsmStateRe // it now. h.outgoing.In() <- err fsm.lock.RLock() - if s := fsm.pConf.GracefulRestart.State; s.Enabled && - (s.NotificationEnabled && err.Type == fsmNotificationRecv || + if s := fsm.pConf.GracefulRestart.State; s.Enabled { + if (s.NotificationEnabled && err.Type == fsmNotificationRecv) || + (err.Type == fsmNotificationSent && + err.BGPNotification.Body.(*bgp.BGPNotification).ErrorCode == bgp.BGP_ERROR_HOLD_TIMER_EXPIRED) || err.Type == fsmReadFailed || - err.Type == fsmWriteFailed) { - err = *newfsmStateReason(fsmGracefulRestart, nil, nil) - log.WithFields(log.Fields{ - "Topic": "Peer", - "Key": fsm.pConf.State.NeighborAddress, - "State": fsm.state.String(), - }).Info("peer graceful restart") - fsm.gracefulRestartTimer.Reset(time.Duration(fsm.pConf.GracefulRestart.State.PeerRestartTime) * time.Second) + err.Type == fsmWriteFailed { + err = *newfsmStateReason(fsmGracefulRestart, nil, nil) + log.WithFields(log.Fields{ + "Topic": "Peer", + "Key": fsm.pConf.State.NeighborAddress, + "State": fsm.state.String(), + }).Info("peer graceful restart") + fsm.gracefulRestartTimer.Reset(time.Duration(fsm.pConf.GracefulRestart.State.PeerRestartTime) * time.Second) + } } fsm.lock.RUnlock() return bgp.BGP_FSM_IDLE, &err @@ -1812,7 +1815,15 @@ func (h *fsmHandler) established(ctx context.Context) (bgp.FSMState, *fsmStateRe fsm.lock.RUnlock() m := bgp.NewBGPNotificationMessage(bgp.BGP_ERROR_HOLD_TIMER_EXPIRED, 0, nil) h.outgoing.In() <- &fsmOutgoingMsg{Notification: m} - return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmHoldTimerExpired, m, nil) + fsm.lock.RLock() + s := fsm.pConf.GracefulRestart.State + fsm.lock.RUnlock() + // Do not return hold timer expired to server if graceful restart is enabled + // Let it fallback to read/write error or fsmNotificationSent handled above + // Reference: https://github.com/osrg/gobgp/issues/2174 + if !s.Enabled { + return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmHoldTimerExpired, m, nil) + } case <-h.holdTimerResetCh: fsm.lock.RLock() if fsm.pConf.Timers.State.NegotiatedHoldTime != 0 { diff --git a/pkg/server/fsm_test.go b/pkg/server/fsm_test.go index 9351c71d..240f5185 100644 --- a/pkg/server/fsm_test.go +++ b/pkg/server/fsm_test.go @@ -244,9 +244,53 @@ func TestFSMHandlerEstablish_HoldTimerExpired(t *testing.T) { p.fsm.pConf.Timers.State.NegotiatedHoldTime = 2 go pushPackets() - state, _ := h.established(context.Background()) + state, fsmStateReason := h.established(context.Background()) time.Sleep(time.Second * 1) assert.Equal(bgp.BGP_FSM_IDLE, state) + assert.Equal(fsmHoldTimerExpired, fsmStateReason.Type) + m.mtx.Lock() + lastMsg := m.sendBuf[len(m.sendBuf)-1] + m.mtx.Unlock() + sent, _ := bgp.ParseBGPMessage(lastMsg) + assert.Equal(uint8(bgp.BGP_MSG_NOTIFICATION), sent.Header.Type) + assert.Equal(uint8(bgp.BGP_ERROR_HOLD_TIMER_EXPIRED), sent.Body.(*bgp.BGPNotification).ErrorCode) +} + +func TestFSMHandlerEstablish_HoldTimerExpired_GR_Enabled(t *testing.T) { + assert := assert.New(t) + m := NewMockConnection(t) + + p, h := makePeerAndHandler() + + // push mock connection + p.fsm.conn = m + p.fsm.h = h + + // set keepalive ticker + p.fsm.pConf.Timers.State.NegotiatedHoldTime = 3 + + msg := keepalive() + header, _ := msg.Header.Serialize() + body, _ := msg.Body.Serialize() + + pushPackets := func() { + // first keepalive from peer + m.setData(header) + m.setData(body) + } + + // set holdtime + p.fsm.pConf.Timers.Config.HoldTime = 2 + p.fsm.pConf.Timers.State.NegotiatedHoldTime = 2 + + // Enable graceful restart + p.fsm.pConf.GracefulRestart.State.Enabled = true + + go pushPackets() + state, fsmStateReason := h.established(context.Background()) + time.Sleep(time.Second * 1) + assert.Equal(bgp.BGP_FSM_IDLE, state) + assert.Equal(fsmGracefulRestart, fsmStateReason.Type) m.mtx.Lock() lastMsg := m.sendBuf[len(m.sendBuf)-1] m.mtx.Unlock() diff --git a/pkg/server/server.go b/pkg/server/server.go index 4fbb76ff..fa0f5225 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -1380,7 +1380,7 @@ func (s *BgpServer) handleFSMMessage(peer *peer, e *fsmMsg) { "Topic": "Peer", "Key": peer.ID(), "Family": family, - }).Debugf("start LLGR restart timer (%d sec) for %s", t, family) + }).Infof("start LLGR restart timer (%d sec) for %s", t, family) select { case <-timer.C: @@ -1389,7 +1389,7 @@ func (s *BgpServer) handleFSMMessage(peer *peer, e *fsmMsg) { "Topic": "Peer", "Key": peer.ID(), "Family": family, - }).Debugf("LLGR restart timer (%d sec) for %s expired", t, family) + }).Infof("LLGR restart timer (%d sec) for %s expired", t, family) s.propagateUpdate(peer, peer.DropAll([]bgp.RouteFamily{family})) // when all llgr restart timer expired, stop PeerRestarting @@ -1403,7 +1403,7 @@ func (s *BgpServer) handleFSMMessage(peer *peer, e *fsmMsg) { "Topic": "Peer", "Key": peer.ID(), "Family": family, - }).Debugf("stop LLGR restart timer (%d sec) for %s", t, family) + }).Infof("stop LLGR restart timer (%d sec) for %s", t, family) } }(f, endCh) } diff --git a/test/scenario_test/graceful_restart_test.py b/test/scenario_test/graceful_restart_test.py index 59dfe226..18c5f55f 100644 --- a/test/scenario_test/graceful_restart_test.py +++ b/test/scenario_test/graceful_restart_test.py @@ -131,11 +131,12 @@ class GoBGPTestBase(unittest.TestCase): time.sleep(1) self.assertEqual(len(g3.get_global_rib('10.10.20.0/24')), 1) - def test_05_graceful_restart(self): + def test_05_holdtime_expiry_graceful_restart(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] g3 = self.bgpds['g3'] - g1.stop_gobgp() + g1.local("ip route add blackhole {}/32".format(g2.ip_addrs[0][1].split("/")[0])) + g1.local("ip route add blackhole {}/32".format(g3.ip_addrs[0][1].split("/")[0])) g2.wait_for(expected_state=BGP_FSM_ACTIVE, peer=g1) self.assertEqual(len(g2.get_global_rib('10.10.20.0/24')), 1) self.assertEqual(len(g2.get_global_rib('10.10.30.0/24')), 1) @@ -147,11 +148,45 @@ class GoBGPTestBase(unittest.TestCase): self.assertEqual(len(g3.get_global_rib('10.10.30.0/24')), 1) def test_06_test_restart_timer_expire(self): + g2 = self.bgpds['g2'] + time.sleep(GRACEFUL_RESTART_TIME + 5) + g2 = self.bgpds['g2'] + self.assertEqual(len(g2.get_global_rib()), 0) + + def test_07_establish_after_graceful_restart(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g1.local("ip route del blackhole {}/32".format(g2.ip_addrs[0][1].split("/")[0])) + g1.local("ip route del blackhole {}/32".format(g3.ip_addrs[0][1].split("/")[0])) + g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g2) + g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g3) + self.assertEqual(len(g2.get_global_rib('10.10.20.0/24')), 1) + self.assertEqual(len(g2.get_global_rib('10.10.30.0/24')), 1) + self.assertEqual(len(g3.get_global_rib('10.10.20.0/24')), 1) + self.assertEqual(len(g3.get_global_rib('10.10.30.0/24')), 1) + + def test_08_graceful_restart(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g1.stop_gobgp() + g2.wait_for(expected_state=BGP_FSM_ACTIVE, peer=g1) + self.assertEqual(len(g2.get_global_rib('10.10.20.0/24')), 1) + self.assertEqual(len(g2.get_global_rib('10.10.30.0/24')), 1) + for d in g2.get_global_rib(): + for p in d['paths']: + self.assertTrue(p['stale']) + + self.assertEqual(len(g3.get_global_rib('10.10.20.0/24')), 0) + self.assertEqual(len(g3.get_global_rib('10.10.30.0/24')), 1) + + def test_09_test_restart_timer_expire(self): time.sleep(GRACEFUL_RESTART_TIME + 5) g2 = self.bgpds['g2'] self.assertEqual(len(g2.get_global_rib()), 0) - def test_07_multineighbor_established(self): + def test_10_multineighbor_established(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] g3 = self.bgpds['g3'] @@ -170,7 +205,7 @@ class GoBGPTestBase(unittest.TestCase): g2.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g1) g3.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g1) - def test_08_multineighbor_graceful_restart(self): + def test_11_multineighbor_graceful_restart(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] g3 = self.bgpds['g3'] diff --git a/test/scenario_test/long_lived_graceful_restart_test.py b/test/scenario_test/long_lived_graceful_restart_test.py index 1fb5edac..4c185894 100644 --- a/test/scenario_test/long_lived_graceful_restart_test.py +++ b/test/scenario_test/long_lived_graceful_restart_test.py @@ -78,18 +78,12 @@ class GoBGPTestBase(unittest.TestCase): g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g3) g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g4) - def test_02_graceful_restart(self): + def _test_graceful_restart(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] g3 = self.bgpds['g3'] g4 = self.bgpds['g4'] - g2.local('gobgp global rib add 10.0.0.0/24') - g2.local('gobgp global rib add 10.10.0.0/24 community no-llgr') - - time.sleep(1) - - g2.stop_gobgp() g1.wait_for(expected_state=BGP_FSM_ACTIVE, peer=g2) time.sleep(1) @@ -117,7 +111,62 @@ class GoBGPTestBase(unittest.TestCase): # withdrawn self.assertEqual(len(g4.get_global_rib('10.0.0.0/24')), 0) - def test_03_softreset_preserves_llgr_community(self): + def test_02_hold_timer_expiry_graceful_restart(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g4 = self.bgpds['g4'] + + g2.local('gobgp global rib add 10.0.0.0/24') + g2.local('gobgp global rib add 10.10.0.0/24 community no-llgr') + + time.sleep(1) + + g2.local("ip route add blackhole {}/32".format(g1.ip_addrs[0][1].split("/")[0])) + g2.local("ip route add blackhole {}/32".format(g3.ip_addrs[0][1].split("/")[0])) + g2.local("ip route add blackhole {}/32".format(g4.ip_addrs[0][1].split("/")[0])) + + self._test_graceful_restart() + + def test_03_neighbor_established_after_hold_time_expiry(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g4 = self.bgpds['g4'] + g2.local("ip route del blackhole {}/32".format(g1.ip_addrs[0][1].split("/")[0])) + g2.local("ip route del blackhole {}/32".format(g3.ip_addrs[0][1].split("/")[0])) + g2.local("ip route del blackhole {}/32".format(g4.ip_addrs[0][1].split("/")[0])) + + g1.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g2) + time.sleep(1) + self.assertEqual(len(g1.get_global_rib('10.0.0.0/24')), 1) + self.assertEqual(len(g1.get_global_rib('10.10.0.0/24')), 1) + # check llgr-stale community is not present in 10.0.0.0/24 + r = g1.get_global_rib('10.0.0.0/24')[0]['paths'][0] + comms = list(chain.from_iterable([attr['communities'] for attr in r['attrs'] if attr['type'] == 8])) + self.assertFalse(0xffff0006 in comms) + + # check llgr-stale community is not present in 10.0.0.0/24 + self.assertEqual(len(g3.get_global_rib('10.0.0.0/24')), 1) + r = g3.get_global_rib('10.0.0.0/24')[0]['paths'][0] + comms = list(chain.from_iterable([attr['communities'] for attr in r['attrs'] if attr['type'] == 8])) + self.assertFalse(0xffff0006 in comms) + + def test_04_graceful_restart(self): + g1 = self.bgpds['g1'] + g2 = self.bgpds['g2'] + g3 = self.bgpds['g3'] + g4 = self.bgpds['g4'] + + g2.local('gobgp global rib add 10.0.0.0/24') + g2.local('gobgp global rib add 10.10.0.0/24 community no-llgr') + + time.sleep(1) + + g2.stop_gobgp() + self._test_graceful_restart() + + def test_05_softreset_preserves_llgr_community(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] g3 = self.bgpds['g3'] @@ -147,7 +196,7 @@ class GoBGPTestBase(unittest.TestCase): # g4 is not llgr capable, llgr-stale route must not be advertized on softreset g1 self.assertEqual(len(g4.get_global_rib('10.0.0.0/24')), 0) - def test_04_neighbor_established(self): + def test_06_neighbor_established(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] # g3 = self.bgpds['g3'] @@ -165,7 +214,7 @@ class GoBGPTestBase(unittest.TestCase): for p in d['paths']: self.assertFalse(p.get('stale', False)) - def test_05_llgr_stale_route_depreferenced(self): + def test_07_llgr_stale_route_depreferenced(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] g3 = self.bgpds['g3'] @@ -193,7 +242,7 @@ class GoBGPTestBase(unittest.TestCase): self.assertEqual(len(rib), 1) self.assertTrue(g2.asn in rib[0]['paths'][0]['aspath']) - def test_06_llgr_restart_timer_expire(self): + def test_08_llgr_restart_timer_expire(self): time.sleep(LONG_LIVED_GRACEFUL_RESTART_TIME + 5) g3 = self.bgpds['g3'] rib = g3.get_global_rib('10.10.0.0/24') |