summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go94
-rw-r--r--pkg/tcpip/tcpip.go10
-rw-r--r--pkg/tcpip/transport/ping/endpoint.go30
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go29
4 files changed, 69 insertions, 94 deletions
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index 5e4a269c6..3a9d1182f 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -150,11 +150,24 @@ type SocketOperations struct {
Endpoint tcpip.Endpoint
skType transport.SockType
- // readMu protects access to readView, control, and sender.
- readMu sync.Mutex `state:"nosave"`
+ // readMu protects access to the below fields.
+ readMu sync.Mutex `state:"nosave"`
+ // readView contains the remaining payload from the last packet.
readView buffer.View
- readCM tcpip.ControlMessages
- sender tcpip.FullAddress
+ // readCM holds control message information for the last packet read
+ // from Endpoint.
+ readCM tcpip.ControlMessages
+ sender tcpip.FullAddress
+ // sockOptTimestamp corresponds to SO_TIMESTAMP. When true, timestamps
+ // of returned messages can be returned via control messages. When
+ // false, the same timestamp is instead stored and can be read via the
+ // SIOCGSTAMP ioctl. See socket(7).
+ sockOptTimestamp bool
+ // timestampValid indicates whether timestamp has been set.
+ timestampValid bool
+ // timestampNS holds the timestamp to use with SIOCGSTAMP. It is only
+ // valid when timestampValid is true.
+ timestampNS int64
}
// New creates a new endpoint socket.
@@ -515,6 +528,24 @@ func (s *SocketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error {
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
// tcpip.Endpoint.
func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name, outLen int) (interface{}, *syserr.Error) {
+ // TODO: Unlike other socket options, SO_TIMESTAMP is
+ // implemented specifically for epsocket.SocketOperations rather than
+ // commonEndpoint. commonEndpoint should be extended to support socket
+ // options where the implementation is not shared, as unix sockets need
+ // their own support for SO_TIMESTAMP.
+ if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP {
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+ val := int32(0)
+ s.readMu.Lock()
+ defer s.readMu.Unlock()
+ if s.sockOptTimestamp {
+ val = 1
+ }
+ return val, nil
+ }
+
return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen)
}
@@ -680,18 +711,6 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family
return linux.NsecToTimeval(s.RecvTimeout()), nil
- case linux.SO_TIMESTAMP:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.TimestampOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
case linux.SO_OOBINLINE:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
@@ -854,6 +873,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfac
// SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by
// tcpip.Endpoint.
func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error {
+ // TODO: Unlike other socket options, SO_TIMESTAMP is
+ // implemented specifically for epsocket.SocketOperations rather than
+ // commonEndpoint. commonEndpoint should be extended to support socket
+ // options where the implementation is not shared, as unix sockets need
+ // their own support for SO_TIMESTAMP.
+ if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP {
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+ s.readMu.Lock()
+ defer s.readMu.Unlock()
+ s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0
+ return nil
+ }
+
return SetSockOpt(t, s, s.Endpoint, level, name, optVal)
}
@@ -962,14 +996,6 @@ func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i
s.SetRecvTimeout(v.ToNsecCapped())
return nil
- case linux.SO_TIMESTAMP:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TimestampOption(v)))
-
default:
socket.SetSockOptEmitUnimplementedEvent(t, name)
}
@@ -1436,6 +1462,11 @@ func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSeq
}
} else {
n, e = dst.CopyOut(ctx, s.readView)
+ // Set the control message, even if 0 bytes were read.
+ if e == nil && s.readCM.HasTimestamp && s.sockOptTimestamp {
+ s.timestampNS = s.readCM.Timestamp
+ s.timestampValid = true
+ }
}
copied += n
s.readView.TrimFront(n)
@@ -1499,6 +1530,11 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe
}
n, err := dst.CopyOut(ctx, s.readView)
+ // Set the control message, even if 0 bytes were read.
+ if err == nil && s.readCM.HasTimestamp && s.sockOptTimestamp {
+ s.timestampNS = s.readCM.Timestamp
+ s.timestampValid = true
+ }
var addr interface{}
var addrLen uint32
if isPacket && senderRequested {
@@ -1508,11 +1544,11 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe
if peek {
if l := len(s.readView); trunc && l > n {
// isPacket must be true.
- return l, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err)
+ return l, addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err)
}
if isPacket || err != nil {
- return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err)
+ return int(n), addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err)
}
// We need to peek beyond the first message.
@@ -1530,7 +1566,7 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe
// We got some data, so no need to return an error.
err = nil
}
- return int(n), nil, 0, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err)
+ return int(n), nil, 0, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err)
}
var msgLen int
@@ -1543,10 +1579,10 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe
}
if trunc {
- return msgLen, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err)
+ return msgLen, addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err)
}
- return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err)
+ return int(n), addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err)
}
// RecvMsg implements the linux syscall recvmsg(2) for sockets backed by
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index fef5ba0e4..3cd431d4c 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -298,9 +298,6 @@ type Endpoint interface {
//
// This method does not block if there is no data pending. It will also
// either return an error or data, never both.
- //
- // A timestamp (in ns) is optionally returned. A zero value indicates
- // that no timestamp was available.
Read(*FullAddress) (buffer.View, ControlMessages, *Error)
// Write writes data to the endpoint's peer. This method does not block if
@@ -326,9 +323,6 @@ type Endpoint interface {
// Peek reads data without consuming it from the endpoint.
//
// This method does not block if there is no data pending.
- //
- // A timestamp (in ns) is optionally returned. A zero value indicates
- // that no timestamp was available.
Peek([][]byte) (uintptr, ControlMessages, *Error)
// Connect connects the endpoint to its peer. Specifying a NIC is
@@ -449,10 +443,6 @@ type QuickAckOption int
// Only supported on Unix sockets.
type PasscredOption int
-// TimestampOption is used by SetSockOpt/GetSockOpt to specify whether
-// SO_TIMESTAMP socket control messages are enabled.
-type TimestampOption int
-
// TCPInfoOption is used by GetSockOpt to expose TCP statistics.
//
// TODO: Add and populate stat fields.
diff --git a/pkg/tcpip/transport/ping/endpoint.go b/pkg/tcpip/transport/ping/endpoint.go
index 29f6c543d..c8263a512 100644
--- a/pkg/tcpip/transport/ping/endpoint.go
+++ b/pkg/tcpip/transport/ping/endpoint.go
@@ -32,7 +32,6 @@ type pingPacket struct {
senderAddress tcpip.FullAddress
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
timestamp int64
- hasTimestamp bool
// views is used as buffer for data when its length is large
// enough to store a VectorisedView.
views [8]buffer.View `state:"nosave"`
@@ -67,7 +66,6 @@ type endpoint struct {
rcvBufSizeMax int `state:".(int)"`
rcvBufSize int
rcvClosed bool
- rcvTimestamp bool
// The following fields are protected by the mu mutex.
mu sync.RWMutex `state:"nosave"`
@@ -140,7 +138,6 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
p := e.rcvList.Front()
e.rcvList.Remove(p)
e.rcvBufSize -= p.data.Size()
- ts := e.rcvTimestamp
e.rcvMu.Unlock()
@@ -148,12 +145,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
*addr = p.senderAddress
}
- if ts && !p.hasTimestamp {
- // Linux uses the current time.
- p.timestamp = e.stack.NowNanoseconds()
- }
-
- return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil
+ return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil
}
// prepareForWrite prepares the endpoint for sending data. In particular, it
@@ -313,12 +305,6 @@ func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error)
// SetSockOpt sets a socket option. Currently not supported.
func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
- switch v := opt.(type) {
- case tcpip.TimestampOption:
- e.rcvMu.Lock()
- e.rcvTimestamp = v != 0
- e.rcvMu.Unlock()
- }
return nil
}
@@ -351,15 +337,6 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
e.rcvMu.Unlock()
return nil
- case *tcpip.TimestampOption:
- e.rcvMu.Lock()
- *o = 0
- if e.rcvTimestamp {
- *o = 1
- }
- e.rcvMu.Unlock()
- return nil
-
case *tcpip.KeepaliveEnabledOption:
*o = 0
return nil
@@ -702,10 +679,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv
e.rcvList.PushBack(pkt)
e.rcvBufSize += vv.Size()
- if e.rcvTimestamp {
- pkt.timestamp = e.stack.NowNanoseconds()
- pkt.hasTimestamp = true
- }
+ pkt.timestamp = e.stack.NowNanoseconds()
e.rcvMu.Unlock()
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index d46bf0ade..fa8f02e46 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -32,7 +32,6 @@ type udpPacket struct {
senderAddress tcpip.FullAddress
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
timestamp int64
- hasTimestamp bool
// views is used as buffer for data when its length is large
// enough to store a VectorisedView.
views [8]buffer.View `state:"nosave"`
@@ -68,7 +67,6 @@ type endpoint struct {
rcvBufSizeMax int `state:".(int)"`
rcvBufSize int
rcvClosed bool
- rcvTimestamp bool
// The following fields are protected by the mu mutex.
mu sync.RWMutex `state:"nosave"`
@@ -203,7 +201,6 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
p := e.rcvList.Front()
e.rcvList.Remove(p)
e.rcvBufSize -= p.data.Size()
- ts := e.rcvTimestamp
e.rcvMu.Unlock()
@@ -211,12 +208,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
*addr = p.senderAddress
}
- if ts && !p.hasTimestamp {
- // Linux uses the current time.
- p.timestamp = e.stack.NowNanoseconds()
- }
-
- return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil
+ return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil
}
// prepareForWrite prepares the endpoint for sending data. In particular, it
@@ -397,11 +389,6 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
e.v6only = v != 0
- case tcpip.TimestampOption:
- e.rcvMu.Lock()
- e.rcvTimestamp = v != 0
- e.rcvMu.Unlock()
-
case tcpip.MulticastTTLOption:
e.mu.Lock()
e.multicastTTL = uint8(v)
@@ -508,15 +495,6 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
e.rcvMu.Unlock()
return nil
- case *tcpip.TimestampOption:
- e.rcvMu.Lock()
- *o = 0
- if e.rcvTimestamp {
- *o = 1
- }
- e.rcvMu.Unlock()
- return nil
-
case *tcpip.MulticastTTLOption:
e.mu.Lock()
*o = tcpip.MulticastTTLOption(e.multicastTTL)
@@ -909,10 +887,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv
e.rcvList.PushBack(pkt)
e.rcvBufSize += vv.Size()
- if e.rcvTimestamp {
- pkt.timestamp = e.stack.NowNanoseconds()
- pkt.hasTimestamp = true
- }
+ pkt.timestamp = e.stack.NowNanoseconds()
e.rcvMu.Unlock()