diff options
-rw-r--r-- | pkg/sentry/socket/epsocket/epsocket.go | 94 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 10 | ||||
-rw-r--r-- | pkg/tcpip/transport/ping/endpoint.go | 30 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/endpoint.go | 29 |
4 files changed, 69 insertions, 94 deletions
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index 5e4a269c6..3a9d1182f 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -150,11 +150,24 @@ type SocketOperations struct { Endpoint tcpip.Endpoint skType transport.SockType - // readMu protects access to readView, control, and sender. - readMu sync.Mutex `state:"nosave"` + // readMu protects access to the below fields. + readMu sync.Mutex `state:"nosave"` + // readView contains the remaining payload from the last packet. readView buffer.View - readCM tcpip.ControlMessages - sender tcpip.FullAddress + // readCM holds control message information for the last packet read + // from Endpoint. + readCM tcpip.ControlMessages + sender tcpip.FullAddress + // sockOptTimestamp corresponds to SO_TIMESTAMP. When true, timestamps + // of returned messages can be returned via control messages. When + // false, the same timestamp is instead stored and can be read via the + // SIOCGSTAMP ioctl. See socket(7). + sockOptTimestamp bool + // timestampValid indicates whether timestamp has been set. + timestampValid bool + // timestampNS holds the timestamp to use with SIOCGSTAMP. It is only + // valid when timestampValid is true. + timestampNS int64 } // New creates a new endpoint socket. @@ -515,6 +528,24 @@ func (s *SocketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error { // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name, outLen int) (interface{}, *syserr.Error) { + // TODO: Unlike other socket options, SO_TIMESTAMP is + // implemented specifically for epsocket.SocketOperations rather than + // commonEndpoint. commonEndpoint should be extended to support socket + // options where the implementation is not shared, as unix sockets need + // their own support for SO_TIMESTAMP. + if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + val := int32(0) + s.readMu.Lock() + defer s.readMu.Unlock() + if s.sockOptTimestamp { + val = 1 + } + return val, nil + } + return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen) } @@ -680,18 +711,6 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family return linux.NsecToTimeval(s.RecvTimeout()), nil - case linux.SO_TIMESTAMP: - if outLen < sizeOfInt32 { - return nil, syserr.ErrInvalidArgument - } - - var v tcpip.TimestampOption - if err := ep.GetSockOpt(&v); err != nil { - return nil, syserr.TranslateNetstackError(err) - } - - return int32(v), nil - case linux.SO_OOBINLINE: if outLen < sizeOfInt32 { return nil, syserr.ErrInvalidArgument @@ -854,6 +873,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfac // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error { + // TODO: Unlike other socket options, SO_TIMESTAMP is + // implemented specifically for epsocket.SocketOperations rather than + // commonEndpoint. commonEndpoint should be extended to support socket + // options where the implementation is not shared, as unix sockets need + // their own support for SO_TIMESTAMP. + if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + s.readMu.Lock() + defer s.readMu.Unlock() + s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0 + return nil + } + return SetSockOpt(t, s, s.Endpoint, level, name, optVal) } @@ -962,14 +996,6 @@ func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i s.SetRecvTimeout(v.ToNsecCapped()) return nil - case linux.SO_TIMESTAMP: - if len(optVal) < sizeOfInt32 { - return syserr.ErrInvalidArgument - } - - v := usermem.ByteOrder.Uint32(optVal) - return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TimestampOption(v))) - default: socket.SetSockOptEmitUnimplementedEvent(t, name) } @@ -1436,6 +1462,11 @@ func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSeq } } else { n, e = dst.CopyOut(ctx, s.readView) + // Set the control message, even if 0 bytes were read. + if e == nil && s.readCM.HasTimestamp && s.sockOptTimestamp { + s.timestampNS = s.readCM.Timestamp + s.timestampValid = true + } } copied += n s.readView.TrimFront(n) @@ -1499,6 +1530,11 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } n, err := dst.CopyOut(ctx, s.readView) + // Set the control message, even if 0 bytes were read. + if err == nil && s.readCM.HasTimestamp && s.sockOptTimestamp { + s.timestampNS = s.readCM.Timestamp + s.timestampValid = true + } var addr interface{} var addrLen uint32 if isPacket && senderRequested { @@ -1508,11 +1544,11 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe if peek { if l := len(s.readView); trunc && l > n { // isPacket must be true. - return l, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return l, addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } if isPacket || err != nil { - return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return int(n), addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } // We need to peek beyond the first message. @@ -1530,7 +1566,7 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe // We got some data, so no need to return an error. err = nil } - return int(n), nil, 0, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return int(n), nil, 0, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } var msgLen int @@ -1543,10 +1579,10 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } if trunc { - return msgLen, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return msgLen, addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } - return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return int(n), addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } // RecvMsg implements the linux syscall recvmsg(2) for sockets backed by diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index fef5ba0e4..3cd431d4c 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -298,9 +298,6 @@ type Endpoint interface { // // This method does not block if there is no data pending. It will also // either return an error or data, never both. - // - // A timestamp (in ns) is optionally returned. A zero value indicates - // that no timestamp was available. Read(*FullAddress) (buffer.View, ControlMessages, *Error) // Write writes data to the endpoint's peer. This method does not block if @@ -326,9 +323,6 @@ type Endpoint interface { // Peek reads data without consuming it from the endpoint. // // This method does not block if there is no data pending. - // - // A timestamp (in ns) is optionally returned. A zero value indicates - // that no timestamp was available. Peek([][]byte) (uintptr, ControlMessages, *Error) // Connect connects the endpoint to its peer. Specifying a NIC is @@ -449,10 +443,6 @@ type QuickAckOption int // Only supported on Unix sockets. type PasscredOption int -// TimestampOption is used by SetSockOpt/GetSockOpt to specify whether -// SO_TIMESTAMP socket control messages are enabled. -type TimestampOption int - // TCPInfoOption is used by GetSockOpt to expose TCP statistics. // // TODO: Add and populate stat fields. diff --git a/pkg/tcpip/transport/ping/endpoint.go b/pkg/tcpip/transport/ping/endpoint.go index 29f6c543d..c8263a512 100644 --- a/pkg/tcpip/transport/ping/endpoint.go +++ b/pkg/tcpip/transport/ping/endpoint.go @@ -32,7 +32,6 @@ type pingPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 - hasTimestamp bool // views is used as buffer for data when its length is large // enough to store a VectorisedView. views [8]buffer.View `state:"nosave"` @@ -67,7 +66,6 @@ type endpoint struct { rcvBufSizeMax int `state:".(int)"` rcvBufSize int rcvClosed bool - rcvTimestamp bool // The following fields are protected by the mu mutex. mu sync.RWMutex `state:"nosave"` @@ -140,7 +138,6 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess p := e.rcvList.Front() e.rcvList.Remove(p) e.rcvBufSize -= p.data.Size() - ts := e.rcvTimestamp e.rcvMu.Unlock() @@ -148,12 +145,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - if ts && !p.hasTimestamp { - // Linux uses the current time. - p.timestamp = e.stack.NowNanoseconds() - } - - return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil + return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -313,12 +305,6 @@ func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) // SetSockOpt sets a socket option. Currently not supported. func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { - switch v := opt.(type) { - case tcpip.TimestampOption: - e.rcvMu.Lock() - e.rcvTimestamp = v != 0 - e.rcvMu.Unlock() - } return nil } @@ -351,15 +337,6 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.rcvMu.Unlock() return nil - case *tcpip.TimestampOption: - e.rcvMu.Lock() - *o = 0 - if e.rcvTimestamp { - *o = 1 - } - e.rcvMu.Unlock() - return nil - case *tcpip.KeepaliveEnabledOption: *o = 0 return nil @@ -702,10 +679,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv e.rcvList.PushBack(pkt) e.rcvBufSize += vv.Size() - if e.rcvTimestamp { - pkt.timestamp = e.stack.NowNanoseconds() - pkt.hasTimestamp = true - } + pkt.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index d46bf0ade..fa8f02e46 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -32,7 +32,6 @@ type udpPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 - hasTimestamp bool // views is used as buffer for data when its length is large // enough to store a VectorisedView. views [8]buffer.View `state:"nosave"` @@ -68,7 +67,6 @@ type endpoint struct { rcvBufSizeMax int `state:".(int)"` rcvBufSize int rcvClosed bool - rcvTimestamp bool // The following fields are protected by the mu mutex. mu sync.RWMutex `state:"nosave"` @@ -203,7 +201,6 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess p := e.rcvList.Front() e.rcvList.Remove(p) e.rcvBufSize -= p.data.Size() - ts := e.rcvTimestamp e.rcvMu.Unlock() @@ -211,12 +208,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - if ts && !p.hasTimestamp { - // Linux uses the current time. - p.timestamp = e.stack.NowNanoseconds() - } - - return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil + return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -397,11 +389,6 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.v6only = v != 0 - case tcpip.TimestampOption: - e.rcvMu.Lock() - e.rcvTimestamp = v != 0 - e.rcvMu.Unlock() - case tcpip.MulticastTTLOption: e.mu.Lock() e.multicastTTL = uint8(v) @@ -508,15 +495,6 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.rcvMu.Unlock() return nil - case *tcpip.TimestampOption: - e.rcvMu.Lock() - *o = 0 - if e.rcvTimestamp { - *o = 1 - } - e.rcvMu.Unlock() - return nil - case *tcpip.MulticastTTLOption: e.mu.Lock() *o = tcpip.MulticastTTLOption(e.multicastTTL) @@ -909,10 +887,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv e.rcvList.PushBack(pkt) e.rcvBufSize += vv.Size() - if e.rcvTimestamp { - pkt.timestamp = e.stack.NowNanoseconds() - pkt.hasTimestamp = true - } + pkt.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() |