summaryrefslogtreecommitdiffhomepage
path: root/server/fsm.go
diff options
context:
space:
mode:
authorISHIDA Wataru <ishida.wataru@lab.ntt.co.jp>2016-05-20 05:58:38 +0000
committerISHIDA Wataru <ishida.wataru@lab.ntt.co.jp>2016-05-20 06:10:34 +0000
commitd22476cb3175ab02730ce1db7d8c9b515460e5a7 (patch)
tree7927d40e9f68f61e5dd6d10ea97cd97258bb4e08 /server/fsm.go
parentc2a0e730ec4fa7354388a178b9e54ad94b0c22ba (diff)
server: fix connectLoop() select loop not to block
connect() blocks MIN_CONNECT_RETRY-1 (= 9sec) at most. If a passive connection comes, establish BGP sessions, then BGP sesions goes down right after that (this can happen when the peer sent invalid update messages etc..) while connect() is blocking, FSM.StateChange(), which writes to fsm.getActiveCh can also block. This leads to block the main goroutine in server.go. This commit fix the issue by invoking a goroutine for connect() each time. Signed-off-by: ISHIDA Wataru <ishida.wataru@lab.ntt.co.jp>
Diffstat (limited to 'server/fsm.go')
-rw-r--r--server/fsm.go104
1 files changed, 51 insertions, 53 deletions
diff --git a/server/fsm.go b/server/fsm.go
index 0c3c2efb..c369b432 100644
--- a/server/fsm.go
+++ b/server/fsm.go
@@ -273,73 +273,73 @@ func (fsm *FSM) sendNotification(code, subType uint8, data []byte, msg string) e
}
func (fsm *FSM) connectLoop() error {
- var tick int
- if tick = int(fsm.pConf.Timers.Config.ConnectRetry); tick < MIN_CONNECT_RETRY {
+ tick := int(fsm.pConf.Timers.Config.ConnectRetry)
+ if tick < MIN_CONNECT_RETRY {
tick = MIN_CONNECT_RETRY
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
- ticker := time.NewTicker(time.Duration(tick) * time.Second)
- ticker.Stop()
-
timer := time.NewTimer(time.Duration(tick) * time.Second)
timer.Stop()
connect := func() {
- if fsm.state == bgp.BGP_FSM_ACTIVE && !fsm.pConf.GracefulRestart.State.PeerRestarting {
- addr := fsm.pConf.Config.NeighborAddress
- port := int(bgp.BGP_PORT)
- if fsm.pConf.Transport.Config.RemotePort != 0 {
- port = int(fsm.pConf.Transport.Config.RemotePort)
- }
- host := net.JoinHostPort(addr, strconv.Itoa(port))
- // check if LocalAddress has been configured
- laddr := fsm.pConf.Transport.Config.LocalAddress
- var conn net.Conn
- var err error
- if laddr != "" {
- if fsm.pConf.Config.AuthPassword != "" {
- deadline := (MIN_CONNECT_RETRY - 1) * 1000 // msec
- conn, err = DialTCPTimeoutWithMD5Sig(addr, port, laddr, fsm.pConf.Config.AuthPassword, deadline)
- } else {
- lhost := net.JoinHostPort(laddr, "0")
- ltcpaddr, e := net.ResolveTCPAddr("tcp", lhost)
- if e != nil {
- log.WithFields(log.Fields{
- "Topic": "Peer",
- "Key": fsm.pConf.Config.NeighborAddress,
- }).Warnf("failed to resolve ltcpaddr: %s", e)
- return
- }
- d := net.Dialer{LocalAddr: ltcpaddr, Timeout: time.Duration(MIN_CONNECT_RETRY-1) * time.Second}
- conn, err = d.Dial("tcp", host)
- }
+ addr := fsm.pConf.Config.NeighborAddress
+ port := int(bgp.BGP_PORT)
+ if fsm.pConf.Transport.Config.RemotePort != 0 {
+ port = int(fsm.pConf.Transport.Config.RemotePort)
+ }
+ host := net.JoinHostPort(addr, strconv.Itoa(port))
+ // check if LocalAddress has been configured
+ laddr := fsm.pConf.Transport.Config.LocalAddress
+ var conn net.Conn
+ var err error
+ if laddr != "" {
+ if fsm.pConf.Config.AuthPassword != "" {
+ deadline := (MIN_CONNECT_RETRY - 1) * 1000 // msec
+ conn, err = DialTCPTimeoutWithMD5Sig(addr, port, laddr, fsm.pConf.Config.AuthPassword, deadline)
} else {
- if fsm.pConf.Config.AuthPassword != "" {
- deadline := (MIN_CONNECT_RETRY - 1) * 1000 // msec
- conn, err = DialTCPTimeoutWithMD5Sig(addr, port, "0.0.0.0", fsm.pConf.Config.AuthPassword, deadline)
- } else {
- conn, err = net.DialTimeout("tcp", host, time.Duration(MIN_CONNECT_RETRY-1)*time.Second)
- }
- }
-
- if err == nil {
- select {
- case fsm.connCh <- conn:
- default:
- conn.Close()
+ lhost := net.JoinHostPort(laddr, "0")
+ ltcpaddr, e := net.ResolveTCPAddr("tcp", lhost)
+ if e != nil {
log.WithFields(log.Fields{
"Topic": "Peer",
"Key": fsm.pConf.Config.NeighborAddress,
- }).Warn("active conn is closed to avoid being blocked")
+ }).Warnf("failed to resolve ltcpaddr: %s", e)
+ return
}
+ d := net.Dialer{LocalAddr: ltcpaddr, Timeout: time.Duration(MIN_CONNECT_RETRY-1) * time.Second}
+ conn, err = d.Dial("tcp", host)
+ }
+ } else {
+ if fsm.pConf.Config.AuthPassword != "" {
+ deadline := (MIN_CONNECT_RETRY - 1) * 1000 // msec
+ conn, err = DialTCPTimeoutWithMD5Sig(addr, port, "0.0.0.0", fsm.pConf.Config.AuthPassword, deadline)
} else {
+ conn, err = net.DialTimeout("tcp", host, time.Duration(MIN_CONNECT_RETRY-1)*time.Second)
+ }
+ }
+
+ if err == nil {
+ select {
+ case fsm.connCh <- conn:
+ return
+ default:
+ conn.Close()
log.WithFields(log.Fields{
"Topic": "Peer",
"Key": fsm.pConf.Config.NeighborAddress,
- }).Debugf("failed to connect: %s", err)
+ }).Warn("active conn is closed to avoid being blocked")
}
+ } else {
+ log.WithFields(log.Fields{
+ "Topic": "Peer",
+ "Key": fsm.pConf.Config.NeighborAddress,
+ }).Debugf("failed to connect: %s", err)
+ }
+
+ if fsm.state == bgp.BGP_FSM_ACTIVE && !fsm.pConf.GracefulRestart.State.PeerRestarting {
+ timer.Reset(time.Duration(tick) * time.Second)
}
}
@@ -350,13 +350,11 @@ func (fsm *FSM) connectLoop() error {
"Topic": "Peer",
"Key": fsm.pConf.Config.NeighborAddress,
}).Debug("stop connect loop")
- ticker.Stop()
return nil
case <-timer.C:
- ticker = time.NewTicker(time.Duration(tick) * time.Second)
- connect()
- case <-ticker.C:
- connect()
+ if fsm.state == bgp.BGP_FSM_ACTIVE && !fsm.pConf.GracefulRestart.State.PeerRestarting {
+ go connect()
+ }
case <-fsm.getActiveCh:
timer.Reset(time.Duration(r.Intn(MIN_CONNECT_RETRY)+MIN_CONNECT_RETRY) * time.Second)
}