summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/sentry/fs/ramfs/dir.go23
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go6
-rwxr-xr-xpkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go3
-rwxr-xr-xpkg/sentry/platform/ring0/defs_impl.go4
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go131
-rwxr-xr-xpkg/sentry/time/seqatomic_parameters_unsafe.go3
-rw-r--r--pkg/tcpip/stack/stack.go30
-rw-r--r--pkg/tcpip/tcpip.go8
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go56
-rw-r--r--pkg/tcpip/transport/tcp/rcv.go27
-rw-r--r--pkg/tcpip/transport/tcp/sack_scoreboard.go4
-rw-r--r--pkg/tcpip/transport/tcp/snd.go5
-rwxr-xr-xpkg/tcpip/transport/tcp/tcp_state_autogen.go6
13 files changed, 218 insertions, 88 deletions
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go
index f3e984c24..78e082b8e 100644
--- a/pkg/sentry/fs/ramfs/dir.go
+++ b/pkg/sentry/fs/ramfs/dir.go
@@ -53,7 +53,6 @@ type Dir struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeIsDirAllocate `state:"nosave"`
fsutil.InodeIsDirTruncate `state:"nosave"`
- fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
@@ -84,7 +83,8 @@ type Dir struct {
var _ fs.InodeOperations = (*Dir)(nil)
-// NewDir returns a new Dir with the given contents and attributes.
+// NewDir returns a new Dir with the given contents and attributes. A reference
+// on each fs.Inode in the `contents` map will be donated to this Dir.
func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions) *Dir {
d := &Dir{
InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, perms, linux.RAMFS_MAGIC),
@@ -138,7 +138,7 @@ func (d *Dir) addChildLocked(ctx context.Context, name string, inode *fs.Inode)
d.NotifyModificationAndStatusChange(ctx)
}
-// AddChild adds a child to this dir.
+// AddChild adds a child to this dir, inheriting its reference.
func (d *Dir) AddChild(ctx context.Context, name string, inode *fs.Inode) {
d.mu.Lock()
defer d.mu.Unlock()
@@ -172,7 +172,9 @@ func (d *Dir) Children() ([]string, map[string]fs.DentAttr) {
return namesCopy, entriesCopy
}
-// removeChildLocked attempts to remove an entry from this directory.
+// removeChildLocked attempts to remove an entry from this directory. It
+// returns the removed fs.Inode along with its reference, which callers are
+// responsible for decrementing.
func (d *Dir) removeChildLocked(ctx context.Context, name string) (*fs.Inode, error) {
inode, ok := d.children[name]
if !ok {
@@ -253,7 +255,8 @@ func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) err
return nil
}
-// Lookup loads an inode at p into a Dirent.
+// Lookup loads an inode at p into a Dirent. It returns the fs.Dirent along
+// with a reference.
func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, error) {
if len(p) > linux.NAME_MAX {
return nil, syserror.ENAMETOOLONG
@@ -408,6 +411,16 @@ func (*Dir) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, ol
return Rename(ctx, oldParent.InodeOperations, oldName, newParent.InodeOperations, newName, replacement)
}
+// Release implements fs.InodeOperation.Release.
+func (d *Dir) Release(_ context.Context) {
+ // Drop references on all children.
+ d.mu.Lock()
+ for _, i := range d.children {
+ i.DecRef()
+ }
+ d.mu.Unlock()
+}
+
// dirFileOperations implements fs.FileOperations for a ramfs directory.
//
// +stateify savable
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 0f4497cd6..159fb7c08 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -56,7 +56,6 @@ func rename(ctx context.Context, oldParent *fs.Inode, oldName string, newParent
type Dir struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeIsDirTruncate `state:"nosave"`
- fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
@@ -252,6 +251,11 @@ func (d *Dir) Allocate(ctx context.Context, node *fs.Inode, offset, length int64
return d.ramfsDir.Allocate(ctx, node, offset, length)
}
+// Release implements fs.InodeOperations.Release.
+func (d *Dir) Release(ctx context.Context) {
+ d.ramfsDir.Release(ctx)
+}
+
// Symlink is a symlink.
//
// +stateify savable
diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go
index be6b07629..25ad17a4e 100755
--- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go
+++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go
@@ -2,10 +2,11 @@ package kernel
import (
"fmt"
- "gvisor.dev/gvisor/third_party/gvsync"
"reflect"
"strings"
"unsafe"
+
+ "gvisor.dev/gvisor/third_party/gvsync"
)
// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go
index a36a17e37..d4bfc5a4a 100755
--- a/pkg/sentry/platform/ring0/defs_impl.go
+++ b/pkg/sentry/platform/ring0/defs_impl.go
@@ -1,14 +1,14 @@
package ring0
import (
- "gvisor.dev/gvisor/pkg/cpuid"
- "reflect"
"syscall"
"fmt"
+ "gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/usermem"
"io"
+ "reflect"
)
var (
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index 0f483faa8..586523d3d 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -845,6 +845,68 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family
return nil, syserr.ErrProtocolNotAvailable
}
+func toLinuxTCPInfo(i tcp.InfoOption) linux.TCPInfo {
+ // Unimplemented fields are explicitly initialized to zero below.
+ return linux.TCPInfo{
+ State: uint8(translateTCPState(tcp.EndpointState(i.ProtocolState))),
+ CaState: 0,
+ Retransmits: 0,
+ Probes: 0,
+ Backoff: 0,
+ Options: 0,
+ WindowScale: uint8((i.Sender.SndWndScale&0xf)<<4 | (i.Receiver.RcvWndScale & 0xf)),
+ DeliveryRateAppLimited: 0,
+
+ RTO: uint32(i.Sender.RTO / time.Microsecond),
+ ATO: 0,
+ SndMss: uint32(i.Sender.MSS),
+ RcvMss: uint32(i.RcvMSS),
+
+ Unacked: uint32(i.Sender.Outstanding),
+ Sacked: uint32(i.SACK.Sacked),
+ Lost: 0,
+ Retrans: 0,
+ Fackets: 0,
+
+ LastDataSent: uint32(i.Sender.LastSendTime.UnixNano() / int64(time.Millisecond)),
+ LastAckSent: 0, // Not tracked by Linux.
+ LastDataRecv: uint32(i.RcvLastDataNanos / int64(time.Millisecond)),
+ LastAckRecv: uint32(i.RcvLastAckNanos / int64(time.Millisecond)),
+
+ PMTU: uint32(i.SndMTU),
+ RcvSsthresh: 0,
+ RTT: uint32(i.Sender.SRTT / time.Microsecond),
+ RTTVar: uint32(i.Sender.RTTVar / time.Microsecond),
+ SndSsthresh: uint32(i.Sender.Ssthresh),
+ SndCwnd: uint32(i.Sender.SndCwnd),
+ Advmss: uint32(i.AMSS),
+ Reordering: 0,
+
+ RcvRTT: uint32(i.RcvAutoParams.RTT / time.Microsecond),
+ RcvSpace: uint32(i.RcvBufSize),
+
+ TotalRetrans: 0,
+
+ PacingRate: 0,
+ MaxPacingRate: 0,
+ BytesAcked: 0,
+ BytesReceived: 0,
+ SegsOut: 0,
+ SegsIn: 0,
+
+ NotSentBytes: 0,
+ MinRTT: uint32(i.RcvAutoParams.RTT / time.Microsecond),
+ DataSegsIn: 0,
+ DataSegsOut: 0,
+
+ DeliveryRate: 0,
+
+ BusyTime: 0,
+ RwndLimited: 0,
+ SndBufLimited: 0,
+ }
+}
+
// getSockOptTCP implements GetSockOpt when level is SOL_TCP.
func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
switch name {
@@ -924,17 +986,14 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
return int32(time.Duration(v) / time.Second), nil
case linux.TCP_INFO:
- var v tcpip.TCPInfoOption
+ var v tcp.InfoOption
if err := ep.GetSockOpt(&v); err != nil {
return nil, syserr.TranslateNetstackError(err)
}
-
- // TODO(b/64800844): Translate fields once they are added to
- // tcpip.TCPInfoOption.
- info := linux.TCPInfo{}
+ info := toLinuxTCPInfo(v)
// Linux truncates the output binary to outLen.
- ib := binary.Marshal(nil, usermem.ByteOrder, &info)
+ ib := binary.Marshal(nil, usermem.ByteOrder, info)
if len(ib) > outLen {
ib = ib[:outLen]
}
@@ -2375,6 +2434,38 @@ func nicStateFlagsToLinux(f stack.NICStateFlags) uint32 {
return rv
}
+// translateTCPState translates an internal endpoint state to the equivalent
+// state in the Linux ABI.
+func translateTCPState(s tcp.EndpointState) uint32 {
+ switch s {
+ case tcp.StateEstablished:
+ return linux.TCP_ESTABLISHED
+ case tcp.StateSynSent:
+ return linux.TCP_SYN_SENT
+ case tcp.StateSynRecv:
+ return linux.TCP_SYN_RECV
+ case tcp.StateFinWait1:
+ return linux.TCP_FIN_WAIT1
+ case tcp.StateFinWait2:
+ return linux.TCP_FIN_WAIT2
+ case tcp.StateTimeWait:
+ return linux.TCP_TIME_WAIT
+ case tcp.StateClose, tcp.StateInitial, tcp.StateBound, tcp.StateConnecting, tcp.StateError:
+ return linux.TCP_CLOSE
+ case tcp.StateCloseWait:
+ return linux.TCP_CLOSE_WAIT
+ case tcp.StateLastAck:
+ return linux.TCP_LAST_ACK
+ case tcp.StateListen:
+ return linux.TCP_LISTEN
+ case tcp.StateClosing:
+ return linux.TCP_CLOSING
+ default:
+ // Internal or unknown state.
+ return 0
+ }
+}
+
// State implements socket.Socket.State. State translates the internal state
// returned by netstack to values defined by Linux.
func (s *SocketOperations) State() uint32 {
@@ -2385,33 +2476,7 @@ func (s *SocketOperations) State() uint32 {
if !s.isPacketBased() {
// TCP socket.
- switch tcp.EndpointState(s.Endpoint.State()) {
- case tcp.StateEstablished:
- return linux.TCP_ESTABLISHED
- case tcp.StateSynSent:
- return linux.TCP_SYN_SENT
- case tcp.StateSynRecv:
- return linux.TCP_SYN_RECV
- case tcp.StateFinWait1:
- return linux.TCP_FIN_WAIT1
- case tcp.StateFinWait2:
- return linux.TCP_FIN_WAIT2
- case tcp.StateTimeWait:
- return linux.TCP_TIME_WAIT
- case tcp.StateClose, tcp.StateInitial, tcp.StateBound, tcp.StateConnecting, tcp.StateError:
- return linux.TCP_CLOSE
- case tcp.StateCloseWait:
- return linux.TCP_CLOSE_WAIT
- case tcp.StateLastAck:
- return linux.TCP_LAST_ACK
- case tcp.StateListen:
- return linux.TCP_LISTEN
- case tcp.StateClosing:
- return linux.TCP_CLOSING
- default:
- // Internal or unknown state.
- return 0
- }
+ return translateTCPState(tcp.EndpointState(s.Endpoint.State()))
}
// TODO(b/112063468): Export states for UDP, ICMP, and raw sockets.
diff --git a/pkg/sentry/time/seqatomic_parameters_unsafe.go b/pkg/sentry/time/seqatomic_parameters_unsafe.go
index b4fb0a7f0..89792c56d 100755
--- a/pkg/sentry/time/seqatomic_parameters_unsafe.go
+++ b/pkg/sentry/time/seqatomic_parameters_unsafe.go
@@ -2,10 +2,11 @@ package time
import (
"fmt"
- "gvisor.dev/gvisor/third_party/gvsync"
"reflect"
"strings"
"unsafe"
+
+ "gvisor.dev/gvisor/third_party/gvsync"
)
// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 6156c3f46..7c31cf493 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -208,6 +208,10 @@ type TCPSenderState struct {
// Cubic holds the state related to CUBIC congestion control.
Cubic TCPCubicState
+
+ // MSS is the size of the largest segment that can be sent without
+ // fragmentation.
+ MSS int
}
// TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
@@ -220,6 +224,9 @@ type TCPSACKInfo struct {
// from the peer endpoint.
ReceivedBlocks []header.SACKBlock
+ // Sacked is the current number of bytes held in the SACK scoreboard.
+ Sacked seqnum.Size
+
// MaxSACKED is the highest sequence number that has been SACKED
// by the peer.
MaxSACKED seqnum.Value
@@ -269,6 +276,14 @@ type TCPEndpointState struct {
// ID is a copy of the TransportEndpointID for the endpoint.
ID TCPEndpointID
+ // ProtocolState denotes the TCP state the endpoint is currently
+ // in, encoded in a netstack-specific manner. Should be translated
+ // to the Linux ABI before exposing to userspace.
+ ProtocolState uint32
+
+ // AMSS is the MSS advertised to the peer by this endpoint.
+ AMSS uint16
+
// SegTime denotes the absolute time when this segment was received.
SegTime time.Time
@@ -286,6 +301,18 @@ type TCPEndpointState struct {
// RcvClosed if true, indicates the endpoint has been closed for reading.
RcvClosed bool
+ // RcvLastAck is the time of receipt of the last packet with the
+ // ACK flag set.
+ RcvLastAckNanos int64
+
+ // RcvLastData is the time of reciept of the last packet
+ // containing data.
+ RcvLastDataNanos int64
+
+ // RcvMSS is the size of the largest segment the receiver is willing to
+ // accept, not including TCP headers and options.
+ RcvMSS int
+
// SendTSOk is used to indicate when the TS Option has been negotiated.
// When sendTSOk is true every non-RST segment should carry a TS as per
// RFC7323#section-1.1.
@@ -326,6 +353,9 @@ type TCPEndpointState struct {
// SndMTU is the smallest MTU seen in the control packets received.
SndMTU int
+ // MaxOptionSize is the maximum size of TCP options.
+ MaxOptionSize int
+
// Receiver holds variables related to the TCP receiver for the endpoint.
Receiver TCPReceiverState
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 4208c0303..690c00edb 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -476,14 +476,6 @@ type QuickAckOption int
// Only supported on Unix sockets.
type PasscredOption int
-// TCPInfoOption is used by GetSockOpt to expose TCP statistics.
-//
-// TODO(b/64800844): Add and populate stat fields.
-type TCPInfoOption struct {
- RTT time.Duration
- RTTVar time.Duration
-}
-
// KeepaliveEnabledOption is used by SetSockOpt/GetSockOpt to specify whether
// TCP keepalive is enabled for this socket.
type KeepaliveEnabledOption int
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index cc49c8272..e94307bd5 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -108,6 +108,9 @@ func (s EndpointState) String() string {
}
}
+// InfoOption is used by GetSockOpt to expose TCP endpoint state.
+type InfoOption stack.TCPEndpointState
+
// Reasons for notifying the protocol goroutine.
const (
notifyNonZeroReceiveWindow = 1 << iota
@@ -202,12 +205,14 @@ type endpoint struct {
// to indicate to users that no more data is coming.
//
// rcvListMu can be taken after the endpoint mu below.
- rcvListMu sync.Mutex `state:"nosave"`
- rcvList segmentList `state:"wait"`
- rcvClosed bool
- rcvBufSize int
- rcvBufUsed int
- rcvAutoParams rcvBufAutoTuneParams
+ rcvListMu sync.Mutex `state:"nosave"`
+ rcvList segmentList `state:"wait"`
+ rcvClosed bool
+ rcvBufSize int
+ rcvBufUsed int
+ rcvAutoParams rcvBufAutoTuneParams
+ rcvLastAckNanos int64 // timestamp
+ rcvLastDataNanos int64 // timestamp
// zeroWindow indicates that the window was closed due to receive buffer
// space being filled up. This is set by the worker goroutine before
// moving a segment to the rcvList. This setting is cleared by the
@@ -1198,17 +1203,10 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
}
return nil
- case *tcpip.TCPInfoOption:
- *o = tcpip.TCPInfoOption{}
- e.mu.RLock()
- snd := e.snd
- e.mu.RUnlock()
- if snd != nil {
- snd.rtt.Lock()
- o.RTT = snd.rtt.srtt
- o.RTTVar = snd.rtt.rttvar
- snd.rtt.Unlock()
- }
+ case *InfoOption:
+ e.workMu.Lock()
+ *o = InfoOption(e.completeState())
+ e.workMu.Unlock()
return nil
case *tcpip.KeepaliveEnabledOption:
@@ -1933,22 +1931,27 @@ func (e *endpoint) maxOptionSize() (size int) {
}
// completeState makes a full copy of the endpoint and returns it. This is used
-// before invoking the probe. The state returned may not be fully consistent if
-// there are intervening syscalls when the state is being copied.
+// before invoking the probe and for getsockopt(TCP_INFO). The state returned
+// may not be fully consistent if there are intervening syscalls when the state
+// is being copied.
func (e *endpoint) completeState() stack.TCPEndpointState {
var s stack.TCPEndpointState
s.SegTime = time.Now()
- // Copy EndpointID.
- e.mu.Lock()
+ e.mu.RLock()
s.ID = stack.TCPEndpointID(e.id)
- e.mu.Unlock()
+ s.ProtocolState = uint32(e.state)
+ s.AMSS = e.amss
+ s.RcvMSS = int(e.amss) - e.maxOptionSize()
+ e.mu.RUnlock()
// Copy endpoint rcv state.
e.rcvListMu.Lock()
s.RcvBufSize = e.rcvBufSize
s.RcvBufUsed = e.rcvBufUsed
s.RcvClosed = e.rcvClosed
+ s.RcvLastAckNanos = e.rcvLastAckNanos
+ s.RcvLastDataNanos = e.rcvLastDataNanos
s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime
s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied
s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied
@@ -1956,6 +1959,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber
s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime
s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled
+
e.rcvListMu.Unlock()
// Endpoint TCP Option state.
@@ -1965,7 +1969,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
s.SACKPermitted = e.sackPermitted
s.SACK.Blocks = make([]header.SACKBlock, e.sack.NumBlocks)
copy(s.SACK.Blocks, e.sack.Blocks[:e.sack.NumBlocks])
- s.SACK.ReceivedBlocks, s.SACK.MaxSACKED = e.scoreboard.Copy()
+ s.SACK.ReceivedBlocks, s.SACK.Sacked, s.SACK.MaxSACKED = e.scoreboard.Copy()
// Copy endpoint send state.
e.sndBufMu.Lock()
@@ -2009,12 +2013,14 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
RTTMeasureTime: e.snd.rttMeasureTime,
Closed: e.snd.closed,
RTO: e.snd.rto,
+ MSS: e.snd.mss,
MaxPayloadSize: e.snd.maxPayloadSize,
SndWndScale: e.snd.sndWndScale,
MaxSentAck: e.snd.maxSentAck,
}
e.snd.rtt.Lock()
s.Sender.SRTT = e.snd.rtt.srtt
+ s.Sender.RTTVar = e.snd.rtt.rttvar
s.Sender.SRTTInited = e.snd.rtt.srttInited
e.snd.rtt.Unlock()
@@ -2059,8 +2065,8 @@ func (e *endpoint) initGSO() {
// State implements tcpip.Endpoint.State. It exports the endpoint's protocol
// state for diagnostics.
func (e *endpoint) State() uint32 {
- e.mu.Lock()
- defer e.mu.Unlock()
+ e.mu.RLock()
+ defer e.mu.RUnlock()
return uint32(e.state)
}
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index e90f9a7d9..a8f490c4a 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -220,25 +220,24 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum
return true
}
-// updateRTT updates the receiver RTT measurement based on the sequence number
-// of the received segment.
-func (r *receiver) updateRTT() {
+// updateRTTLocked updates the receiver RTT measurement based on the sequence
+// number of the received segment.
+//
+// Precondition: Caller must hold r.ep.rcvListMu.
+func (r *receiver) updateRTTLocked() {
// From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf
//
// A system that is only transmitting acknowledgements can still
// estimate the round-trip time by observing the time between when a byte
// is first acknowledged and the receipt of data that is at least one
// window beyond the sequence number that was acknowledged.
- r.ep.rcvListMu.Lock()
if r.ep.rcvAutoParams.rttMeasureTime.IsZero() {
// New measurement.
r.ep.rcvAutoParams.rttMeasureTime = time.Now()
r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
- r.ep.rcvListMu.Unlock()
return
}
if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) {
- r.ep.rcvListMu.Unlock()
return
}
rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime)
@@ -250,7 +249,6 @@ func (r *receiver) updateRTT() {
}
r.ep.rcvAutoParams.rttMeasureTime = time.Now()
r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
- r.ep.rcvListMu.Unlock()
}
// handleRcvdSegment handles TCP segments directed at the connection managed by
@@ -291,11 +289,20 @@ func (r *receiver) handleRcvdSegment(s *segment) {
return
}
- // Since we consumed a segment update the receiver's RTT estimate
- // if required.
+ r.ep.rcvListMu.Lock()
+ // FIXME(b/137581805): Using the runtime clock here is incorrect as it
+ // doesn't account for potentially virtualized time.
+ now := time.Now().UnixNano()
+ if s.flagIsSet(header.TCPFlagAck) {
+ r.ep.rcvLastAckNanos = now
+ }
if segLen > 0 {
- r.updateRTT()
+ // Since we consumed a segment update the receiver's RTT estimate if
+ // required.
+ r.ep.rcvLastDataNanos = now
+ r.updateRTTLocked()
}
+ r.ep.rcvListMu.Unlock()
// By consuming the current segment, we may have filled a gap in the
// sequence number domain that allows pending segments to be consumed
diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go
index 7ef2df377..02e52a63b 100644
--- a/pkg/tcpip/transport/tcp/sack_scoreboard.go
+++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go
@@ -208,12 +208,12 @@ func (s *SACKScoreboard) Delete(seq seqnum.Value) {
}
// Copy provides a copy of the SACK scoreboard.
-func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value) {
+func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, sacked seqnum.Size, maxSACKED seqnum.Value) {
s.ranges.Ascend(func(i btree.Item) bool {
sackBlocks = append(sackBlocks, i.(header.SACKBlock))
return true
})
- return sackBlocks, s.maxSACKED
+ return sackBlocks, s.sacked, s.maxSACKED
}
// IsRangeLost implements the IsLost(SeqNum) operation defined in RFC 6675
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 0fee7ab72..daf28a49a 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -124,6 +124,10 @@ type sender struct {
rtt rtt
rto time.Duration
+ // mss is the largest segment that can be sent without fragmentation.
+ // Initialized when then sender is created, read-only afterwards.
+ mss int
+
// maxPayloadSize is the maximum size of the payload of a given segment.
// It is initialized on demand.
maxPayloadSize int
@@ -201,6 +205,7 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
rto: 1 * time.Second,
rttMeasureSeqNum: iss + 1,
lastSendTime: time.Now(),
+ mss: int(mss),
maxPayloadSize: maxPayloadSize,
maxSentAck: irs + 1,
fr: fastRecovery{
diff --git a/pkg/tcpip/transport/tcp/tcp_state_autogen.go b/pkg/tcpip/transport/tcp/tcp_state_autogen.go
index 159444cc2..c62060ef2 100755
--- a/pkg/tcpip/transport/tcp/tcp_state_autogen.go
+++ b/pkg/tcpip/transport/tcp/tcp_state_autogen.go
@@ -92,6 +92,8 @@ func (x *endpoint) save(m state.Map) {
m.Save("rcvBufSize", &x.rcvBufSize)
m.Save("rcvBufUsed", &x.rcvBufUsed)
m.Save("rcvAutoParams", &x.rcvAutoParams)
+ m.Save("rcvLastAckNanos", &x.rcvLastAckNanos)
+ m.Save("rcvLastDataNanos", &x.rcvLastDataNanos)
m.Save("zeroWindow", &x.zeroWindow)
m.Save("id", &x.id)
m.Save("isRegistered", &x.isRegistered)
@@ -140,6 +142,8 @@ func (x *endpoint) load(m state.Map) {
m.Load("rcvBufSize", &x.rcvBufSize)
m.Load("rcvBufUsed", &x.rcvBufUsed)
m.Load("rcvAutoParams", &x.rcvAutoParams)
+ m.Load("rcvLastAckNanos", &x.rcvLastAckNanos)
+ m.Load("rcvLastDataNanos", &x.rcvLastDataNanos)
m.Load("zeroWindow", &x.zeroWindow)
m.Load("id", &x.id)
m.Load("isRegistered", &x.isRegistered)
@@ -337,6 +341,7 @@ func (x *sender) save(m state.Map) {
m.Save("writeList", &x.writeList)
m.Save("rtt", &x.rtt)
m.Save("rto", &x.rto)
+ m.Save("mss", &x.mss)
m.Save("maxPayloadSize", &x.maxPayloadSize)
m.Save("gso", &x.gso)
m.Save("sndWndScale", &x.sndWndScale)
@@ -362,6 +367,7 @@ func (x *sender) load(m state.Map) {
m.Load("writeList", &x.writeList)
m.Load("rtt", &x.rtt)
m.Load("rto", &x.rto)
+ m.Load("mss", &x.mss)
m.Load("maxPayloadSize", &x.maxPayloadSize)
m.Load("gso", &x.gso)
m.Load("sndWndScale", &x.sndWndScale)