diff options
author | gVisor bot <gvisor-bot@google.com> | 2019-08-01 21:34:15 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-08-01 21:34:15 +0000 |
commit | fad75173249339f0cf84e5bcdef57e029d40af87 (patch) | |
tree | ca49643f09f6edd080b60e5eeca98aeccef76569 /pkg | |
parent | 3d89ab8d2a232f8a72c0ace4c37a638b0e9b6f9a (diff) | |
parent | f2b25aeac7b5ba44144d003fbb2ae657461b8e9b (diff) |
Merge f2b25aea (automated)
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/fs/ramfs/dir.go | 23 | ||||
-rw-r--r-- | pkg/sentry/fs/tmpfs/tmpfs.go | 6 | ||||
-rwxr-xr-x | pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go | 3 | ||||
-rwxr-xr-x | pkg/sentry/platform/ring0/defs_impl.go | 4 | ||||
-rw-r--r-- | pkg/sentry/socket/epsocket/epsocket.go | 131 | ||||
-rwxr-xr-x | pkg/sentry/time/seqatomic_parameters_unsafe.go | 3 | ||||
-rw-r--r-- | pkg/tcpip/stack/stack.go | 30 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 8 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/endpoint.go | 56 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/rcv.go | 27 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/sack_scoreboard.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/snd.go | 5 | ||||
-rwxr-xr-x | pkg/tcpip/transport/tcp/tcp_state_autogen.go | 6 |
13 files changed, 218 insertions, 88 deletions
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index f3e984c24..78e082b8e 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -53,7 +53,6 @@ type Dir struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeIsDirAllocate `state:"nosave"` fsutil.InodeIsDirTruncate `state:"nosave"` - fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` @@ -84,7 +83,8 @@ type Dir struct { var _ fs.InodeOperations = (*Dir)(nil) -// NewDir returns a new Dir with the given contents and attributes. +// NewDir returns a new Dir with the given contents and attributes. A reference +// on each fs.Inode in the `contents` map will be donated to this Dir. func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions) *Dir { d := &Dir{ InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, perms, linux.RAMFS_MAGIC), @@ -138,7 +138,7 @@ func (d *Dir) addChildLocked(ctx context.Context, name string, inode *fs.Inode) d.NotifyModificationAndStatusChange(ctx) } -// AddChild adds a child to this dir. +// AddChild adds a child to this dir, inheriting its reference. func (d *Dir) AddChild(ctx context.Context, name string, inode *fs.Inode) { d.mu.Lock() defer d.mu.Unlock() @@ -172,7 +172,9 @@ func (d *Dir) Children() ([]string, map[string]fs.DentAttr) { return namesCopy, entriesCopy } -// removeChildLocked attempts to remove an entry from this directory. +// removeChildLocked attempts to remove an entry from this directory. It +// returns the removed fs.Inode along with its reference, which callers are +// responsible for decrementing. func (d *Dir) removeChildLocked(ctx context.Context, name string) (*fs.Inode, error) { inode, ok := d.children[name] if !ok { @@ -253,7 +255,8 @@ func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) err return nil } -// Lookup loads an inode at p into a Dirent. +// Lookup loads an inode at p into a Dirent. It returns the fs.Dirent along +// with a reference. func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, error) { if len(p) > linux.NAME_MAX { return nil, syserror.ENAMETOOLONG @@ -408,6 +411,16 @@ func (*Dir) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, ol return Rename(ctx, oldParent.InodeOperations, oldName, newParent.InodeOperations, newName, replacement) } +// Release implements fs.InodeOperation.Release. +func (d *Dir) Release(_ context.Context) { + // Drop references on all children. + d.mu.Lock() + for _, i := range d.children { + i.DecRef() + } + d.mu.Unlock() +} + // dirFileOperations implements fs.FileOperations for a ramfs directory. // // +stateify savable diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index 0f4497cd6..159fb7c08 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -56,7 +56,6 @@ func rename(ctx context.Context, oldParent *fs.Inode, oldName string, newParent type Dir struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeIsDirTruncate `state:"nosave"` - fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` @@ -252,6 +251,11 @@ func (d *Dir) Allocate(ctx context.Context, node *fs.Inode, offset, length int64 return d.ramfsDir.Allocate(ctx, node, offset, length) } +// Release implements fs.InodeOperations.Release. +func (d *Dir) Release(ctx context.Context) { + d.ramfsDir.Release(ctx) +} + // Symlink is a symlink. // // +stateify savable diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go index be6b07629..25ad17a4e 100755 --- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -2,10 +2,11 @@ package kernel import ( "fmt" - "gvisor.dev/gvisor/third_party/gvsync" "reflect" "strings" "unsafe" + + "gvisor.dev/gvisor/third_party/gvsync" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go index a36a17e37..d4bfc5a4a 100755 --- a/pkg/sentry/platform/ring0/defs_impl.go +++ b/pkg/sentry/platform/ring0/defs_impl.go @@ -1,14 +1,14 @@ package ring0 import ( - "gvisor.dev/gvisor/pkg/cpuid" - "reflect" "syscall" "fmt" + "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/usermem" "io" + "reflect" ) var ( diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index 0f483faa8..586523d3d 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -845,6 +845,68 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family return nil, syserr.ErrProtocolNotAvailable } +func toLinuxTCPInfo(i tcp.InfoOption) linux.TCPInfo { + // Unimplemented fields are explicitly initialized to zero below. + return linux.TCPInfo{ + State: uint8(translateTCPState(tcp.EndpointState(i.ProtocolState))), + CaState: 0, + Retransmits: 0, + Probes: 0, + Backoff: 0, + Options: 0, + WindowScale: uint8((i.Sender.SndWndScale&0xf)<<4 | (i.Receiver.RcvWndScale & 0xf)), + DeliveryRateAppLimited: 0, + + RTO: uint32(i.Sender.RTO / time.Microsecond), + ATO: 0, + SndMss: uint32(i.Sender.MSS), + RcvMss: uint32(i.RcvMSS), + + Unacked: uint32(i.Sender.Outstanding), + Sacked: uint32(i.SACK.Sacked), + Lost: 0, + Retrans: 0, + Fackets: 0, + + LastDataSent: uint32(i.Sender.LastSendTime.UnixNano() / int64(time.Millisecond)), + LastAckSent: 0, // Not tracked by Linux. + LastDataRecv: uint32(i.RcvLastDataNanos / int64(time.Millisecond)), + LastAckRecv: uint32(i.RcvLastAckNanos / int64(time.Millisecond)), + + PMTU: uint32(i.SndMTU), + RcvSsthresh: 0, + RTT: uint32(i.Sender.SRTT / time.Microsecond), + RTTVar: uint32(i.Sender.RTTVar / time.Microsecond), + SndSsthresh: uint32(i.Sender.Ssthresh), + SndCwnd: uint32(i.Sender.SndCwnd), + Advmss: uint32(i.AMSS), + Reordering: 0, + + RcvRTT: uint32(i.RcvAutoParams.RTT / time.Microsecond), + RcvSpace: uint32(i.RcvBufSize), + + TotalRetrans: 0, + + PacingRate: 0, + MaxPacingRate: 0, + BytesAcked: 0, + BytesReceived: 0, + SegsOut: 0, + SegsIn: 0, + + NotSentBytes: 0, + MinRTT: uint32(i.RcvAutoParams.RTT / time.Microsecond), + DataSegsIn: 0, + DataSegsOut: 0, + + DeliveryRate: 0, + + BusyTime: 0, + RwndLimited: 0, + SndBufLimited: 0, + } +} + // getSockOptTCP implements GetSockOpt when level is SOL_TCP. func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) { switch name { @@ -924,17 +986,14 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa return int32(time.Duration(v) / time.Second), nil case linux.TCP_INFO: - var v tcpip.TCPInfoOption + var v tcp.InfoOption if err := ep.GetSockOpt(&v); err != nil { return nil, syserr.TranslateNetstackError(err) } - - // TODO(b/64800844): Translate fields once they are added to - // tcpip.TCPInfoOption. - info := linux.TCPInfo{} + info := toLinuxTCPInfo(v) // Linux truncates the output binary to outLen. - ib := binary.Marshal(nil, usermem.ByteOrder, &info) + ib := binary.Marshal(nil, usermem.ByteOrder, info) if len(ib) > outLen { ib = ib[:outLen] } @@ -2375,6 +2434,38 @@ func nicStateFlagsToLinux(f stack.NICStateFlags) uint32 { return rv } +// translateTCPState translates an internal endpoint state to the equivalent +// state in the Linux ABI. +func translateTCPState(s tcp.EndpointState) uint32 { + switch s { + case tcp.StateEstablished: + return linux.TCP_ESTABLISHED + case tcp.StateSynSent: + return linux.TCP_SYN_SENT + case tcp.StateSynRecv: + return linux.TCP_SYN_RECV + case tcp.StateFinWait1: + return linux.TCP_FIN_WAIT1 + case tcp.StateFinWait2: + return linux.TCP_FIN_WAIT2 + case tcp.StateTimeWait: + return linux.TCP_TIME_WAIT + case tcp.StateClose, tcp.StateInitial, tcp.StateBound, tcp.StateConnecting, tcp.StateError: + return linux.TCP_CLOSE + case tcp.StateCloseWait: + return linux.TCP_CLOSE_WAIT + case tcp.StateLastAck: + return linux.TCP_LAST_ACK + case tcp.StateListen: + return linux.TCP_LISTEN + case tcp.StateClosing: + return linux.TCP_CLOSING + default: + // Internal or unknown state. + return 0 + } +} + // State implements socket.Socket.State. State translates the internal state // returned by netstack to values defined by Linux. func (s *SocketOperations) State() uint32 { @@ -2385,33 +2476,7 @@ func (s *SocketOperations) State() uint32 { if !s.isPacketBased() { // TCP socket. - switch tcp.EndpointState(s.Endpoint.State()) { - case tcp.StateEstablished: - return linux.TCP_ESTABLISHED - case tcp.StateSynSent: - return linux.TCP_SYN_SENT - case tcp.StateSynRecv: - return linux.TCP_SYN_RECV - case tcp.StateFinWait1: - return linux.TCP_FIN_WAIT1 - case tcp.StateFinWait2: - return linux.TCP_FIN_WAIT2 - case tcp.StateTimeWait: - return linux.TCP_TIME_WAIT - case tcp.StateClose, tcp.StateInitial, tcp.StateBound, tcp.StateConnecting, tcp.StateError: - return linux.TCP_CLOSE - case tcp.StateCloseWait: - return linux.TCP_CLOSE_WAIT - case tcp.StateLastAck: - return linux.TCP_LAST_ACK - case tcp.StateListen: - return linux.TCP_LISTEN - case tcp.StateClosing: - return linux.TCP_CLOSING - default: - // Internal or unknown state. - return 0 - } + return translateTCPState(tcp.EndpointState(s.Endpoint.State())) } // TODO(b/112063468): Export states for UDP, ICMP, and raw sockets. diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go index b4fb0a7f0..89792c56d 100755 --- a/pkg/sentry/time/seqatomic_parameters_unsafe.go +++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go @@ -2,10 +2,11 @@ package time import ( "fmt" - "gvisor.dev/gvisor/third_party/gvsync" "reflect" "strings" "unsafe" + + "gvisor.dev/gvisor/third_party/gvsync" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 6156c3f46..7c31cf493 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -208,6 +208,10 @@ type TCPSenderState struct { // Cubic holds the state related to CUBIC congestion control. Cubic TCPCubicState + + // MSS is the size of the largest segment that can be sent without + // fragmentation. + MSS int } // TCPSACKInfo holds TCP SACK related information for a given TCP endpoint. @@ -220,6 +224,9 @@ type TCPSACKInfo struct { // from the peer endpoint. ReceivedBlocks []header.SACKBlock + // Sacked is the current number of bytes held in the SACK scoreboard. + Sacked seqnum.Size + // MaxSACKED is the highest sequence number that has been SACKED // by the peer. MaxSACKED seqnum.Value @@ -269,6 +276,14 @@ type TCPEndpointState struct { // ID is a copy of the TransportEndpointID for the endpoint. ID TCPEndpointID + // ProtocolState denotes the TCP state the endpoint is currently + // in, encoded in a netstack-specific manner. Should be translated + // to the Linux ABI before exposing to userspace. + ProtocolState uint32 + + // AMSS is the MSS advertised to the peer by this endpoint. + AMSS uint16 + // SegTime denotes the absolute time when this segment was received. SegTime time.Time @@ -286,6 +301,18 @@ type TCPEndpointState struct { // RcvClosed if true, indicates the endpoint has been closed for reading. RcvClosed bool + // RcvLastAck is the time of receipt of the last packet with the + // ACK flag set. + RcvLastAckNanos int64 + + // RcvLastData is the time of reciept of the last packet + // containing data. + RcvLastDataNanos int64 + + // RcvMSS is the size of the largest segment the receiver is willing to + // accept, not including TCP headers and options. + RcvMSS int + // SendTSOk is used to indicate when the TS Option has been negotiated. // When sendTSOk is true every non-RST segment should carry a TS as per // RFC7323#section-1.1. @@ -326,6 +353,9 @@ type TCPEndpointState struct { // SndMTU is the smallest MTU seen in the control packets received. SndMTU int + // MaxOptionSize is the maximum size of TCP options. + MaxOptionSize int + // Receiver holds variables related to the TCP receiver for the endpoint. Receiver TCPReceiverState diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 4208c0303..690c00edb 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -476,14 +476,6 @@ type QuickAckOption int // Only supported on Unix sockets. type PasscredOption int -// TCPInfoOption is used by GetSockOpt to expose TCP statistics. -// -// TODO(b/64800844): Add and populate stat fields. -type TCPInfoOption struct { - RTT time.Duration - RTTVar time.Duration -} - // KeepaliveEnabledOption is used by SetSockOpt/GetSockOpt to specify whether // TCP keepalive is enabled for this socket. type KeepaliveEnabledOption int diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index cc49c8272..e94307bd5 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -108,6 +108,9 @@ func (s EndpointState) String() string { } } +// InfoOption is used by GetSockOpt to expose TCP endpoint state. +type InfoOption stack.TCPEndpointState + // Reasons for notifying the protocol goroutine. const ( notifyNonZeroReceiveWindow = 1 << iota @@ -202,12 +205,14 @@ type endpoint struct { // to indicate to users that no more data is coming. // // rcvListMu can be taken after the endpoint mu below. - rcvListMu sync.Mutex `state:"nosave"` - rcvList segmentList `state:"wait"` - rcvClosed bool - rcvBufSize int - rcvBufUsed int - rcvAutoParams rcvBufAutoTuneParams + rcvListMu sync.Mutex `state:"nosave"` + rcvList segmentList `state:"wait"` + rcvClosed bool + rcvBufSize int + rcvBufUsed int + rcvAutoParams rcvBufAutoTuneParams + rcvLastAckNanos int64 // timestamp + rcvLastDataNanos int64 // timestamp // zeroWindow indicates that the window was closed due to receive buffer // space being filled up. This is set by the worker goroutine before // moving a segment to the rcvList. This setting is cleared by the @@ -1198,17 +1203,10 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } return nil - case *tcpip.TCPInfoOption: - *o = tcpip.TCPInfoOption{} - e.mu.RLock() - snd := e.snd - e.mu.RUnlock() - if snd != nil { - snd.rtt.Lock() - o.RTT = snd.rtt.srtt - o.RTTVar = snd.rtt.rttvar - snd.rtt.Unlock() - } + case *InfoOption: + e.workMu.Lock() + *o = InfoOption(e.completeState()) + e.workMu.Unlock() return nil case *tcpip.KeepaliveEnabledOption: @@ -1933,22 +1931,27 @@ func (e *endpoint) maxOptionSize() (size int) { } // completeState makes a full copy of the endpoint and returns it. This is used -// before invoking the probe. The state returned may not be fully consistent if -// there are intervening syscalls when the state is being copied. +// before invoking the probe and for getsockopt(TCP_INFO). The state returned +// may not be fully consistent if there are intervening syscalls when the state +// is being copied. func (e *endpoint) completeState() stack.TCPEndpointState { var s stack.TCPEndpointState s.SegTime = time.Now() - // Copy EndpointID. - e.mu.Lock() + e.mu.RLock() s.ID = stack.TCPEndpointID(e.id) - e.mu.Unlock() + s.ProtocolState = uint32(e.state) + s.AMSS = e.amss + s.RcvMSS = int(e.amss) - e.maxOptionSize() + e.mu.RUnlock() // Copy endpoint rcv state. e.rcvListMu.Lock() s.RcvBufSize = e.rcvBufSize s.RcvBufUsed = e.rcvBufUsed s.RcvClosed = e.rcvClosed + s.RcvLastAckNanos = e.rcvLastAckNanos + s.RcvLastDataNanos = e.rcvLastDataNanos s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied @@ -1956,6 +1959,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState { s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled + e.rcvListMu.Unlock() // Endpoint TCP Option state. @@ -1965,7 +1969,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState { s.SACKPermitted = e.sackPermitted s.SACK.Blocks = make([]header.SACKBlock, e.sack.NumBlocks) copy(s.SACK.Blocks, e.sack.Blocks[:e.sack.NumBlocks]) - s.SACK.ReceivedBlocks, s.SACK.MaxSACKED = e.scoreboard.Copy() + s.SACK.ReceivedBlocks, s.SACK.Sacked, s.SACK.MaxSACKED = e.scoreboard.Copy() // Copy endpoint send state. e.sndBufMu.Lock() @@ -2009,12 +2013,14 @@ func (e *endpoint) completeState() stack.TCPEndpointState { RTTMeasureTime: e.snd.rttMeasureTime, Closed: e.snd.closed, RTO: e.snd.rto, + MSS: e.snd.mss, MaxPayloadSize: e.snd.maxPayloadSize, SndWndScale: e.snd.sndWndScale, MaxSentAck: e.snd.maxSentAck, } e.snd.rtt.Lock() s.Sender.SRTT = e.snd.rtt.srtt + s.Sender.RTTVar = e.snd.rtt.rttvar s.Sender.SRTTInited = e.snd.rtt.srttInited e.snd.rtt.Unlock() @@ -2059,8 +2065,8 @@ func (e *endpoint) initGSO() { // State implements tcpip.Endpoint.State. It exports the endpoint's protocol // state for diagnostics. func (e *endpoint) State() uint32 { - e.mu.Lock() - defer e.mu.Unlock() + e.mu.RLock() + defer e.mu.RUnlock() return uint32(e.state) } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index e90f9a7d9..a8f490c4a 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -220,25 +220,24 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum return true } -// updateRTT updates the receiver RTT measurement based on the sequence number -// of the received segment. -func (r *receiver) updateRTT() { +// updateRTTLocked updates the receiver RTT measurement based on the sequence +// number of the received segment. +// +// Precondition: Caller must hold r.ep.rcvListMu. +func (r *receiver) updateRTTLocked() { // From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf // // A system that is only transmitting acknowledgements can still // estimate the round-trip time by observing the time between when a byte // is first acknowledged and the receipt of data that is at least one // window beyond the sequence number that was acknowledged. - r.ep.rcvListMu.Lock() if r.ep.rcvAutoParams.rttMeasureTime.IsZero() { // New measurement. r.ep.rcvAutoParams.rttMeasureTime = time.Now() r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) - r.ep.rcvListMu.Unlock() return } if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) { - r.ep.rcvListMu.Unlock() return } rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime) @@ -250,7 +249,6 @@ func (r *receiver) updateRTT() { } r.ep.rcvAutoParams.rttMeasureTime = time.Now() r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) - r.ep.rcvListMu.Unlock() } // handleRcvdSegment handles TCP segments directed at the connection managed by @@ -291,11 +289,20 @@ func (r *receiver) handleRcvdSegment(s *segment) { return } - // Since we consumed a segment update the receiver's RTT estimate - // if required. + r.ep.rcvListMu.Lock() + // FIXME(b/137581805): Using the runtime clock here is incorrect as it + // doesn't account for potentially virtualized time. + now := time.Now().UnixNano() + if s.flagIsSet(header.TCPFlagAck) { + r.ep.rcvLastAckNanos = now + } if segLen > 0 { - r.updateRTT() + // Since we consumed a segment update the receiver's RTT estimate if + // required. + r.ep.rcvLastDataNanos = now + r.updateRTTLocked() } + r.ep.rcvListMu.Unlock() // By consuming the current segment, we may have filled a gap in the // sequence number domain that allows pending segments to be consumed diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go index 7ef2df377..02e52a63b 100644 --- a/pkg/tcpip/transport/tcp/sack_scoreboard.go +++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go @@ -208,12 +208,12 @@ func (s *SACKScoreboard) Delete(seq seqnum.Value) { } // Copy provides a copy of the SACK scoreboard. -func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value) { +func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, sacked seqnum.Size, maxSACKED seqnum.Value) { s.ranges.Ascend(func(i btree.Item) bool { sackBlocks = append(sackBlocks, i.(header.SACKBlock)) return true }) - return sackBlocks, s.maxSACKED + return sackBlocks, s.sacked, s.maxSACKED } // IsRangeLost implements the IsLost(SeqNum) operation defined in RFC 6675 diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 0fee7ab72..daf28a49a 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -124,6 +124,10 @@ type sender struct { rtt rtt rto time.Duration + // mss is the largest segment that can be sent without fragmentation. + // Initialized when then sender is created, read-only afterwards. + mss int + // maxPayloadSize is the maximum size of the payload of a given segment. // It is initialized on demand. maxPayloadSize int @@ -201,6 +205,7 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint rto: 1 * time.Second, rttMeasureSeqNum: iss + 1, lastSendTime: time.Now(), + mss: int(mss), maxPayloadSize: maxPayloadSize, maxSentAck: irs + 1, fr: fastRecovery{ diff --git a/pkg/tcpip/transport/tcp/tcp_state_autogen.go b/pkg/tcpip/transport/tcp/tcp_state_autogen.go index 159444cc2..c62060ef2 100755 --- a/pkg/tcpip/transport/tcp/tcp_state_autogen.go +++ b/pkg/tcpip/transport/tcp/tcp_state_autogen.go @@ -92,6 +92,8 @@ func (x *endpoint) save(m state.Map) { m.Save("rcvBufSize", &x.rcvBufSize) m.Save("rcvBufUsed", &x.rcvBufUsed) m.Save("rcvAutoParams", &x.rcvAutoParams) + m.Save("rcvLastAckNanos", &x.rcvLastAckNanos) + m.Save("rcvLastDataNanos", &x.rcvLastDataNanos) m.Save("zeroWindow", &x.zeroWindow) m.Save("id", &x.id) m.Save("isRegistered", &x.isRegistered) @@ -140,6 +142,8 @@ func (x *endpoint) load(m state.Map) { m.Load("rcvBufSize", &x.rcvBufSize) m.Load("rcvBufUsed", &x.rcvBufUsed) m.Load("rcvAutoParams", &x.rcvAutoParams) + m.Load("rcvLastAckNanos", &x.rcvLastAckNanos) + m.Load("rcvLastDataNanos", &x.rcvLastDataNanos) m.Load("zeroWindow", &x.zeroWindow) m.Load("id", &x.id) m.Load("isRegistered", &x.isRegistered) @@ -337,6 +341,7 @@ func (x *sender) save(m state.Map) { m.Save("writeList", &x.writeList) m.Save("rtt", &x.rtt) m.Save("rto", &x.rto) + m.Save("mss", &x.mss) m.Save("maxPayloadSize", &x.maxPayloadSize) m.Save("gso", &x.gso) m.Save("sndWndScale", &x.sndWndScale) @@ -362,6 +367,7 @@ func (x *sender) load(m state.Map) { m.Load("writeList", &x.writeList) m.Load("rtt", &x.rtt) m.Load("rto", &x.rto) + m.Load("mss", &x.mss) m.Load("maxPayloadSize", &x.maxPayloadSize) m.Load("gso", &x.gso) m.Load("sndWndScale", &x.sndWndScale) |