diff options
author | Kevin Krakauer <krakauer@google.com> | 2019-02-15 11:17:51 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-02-15 11:18:44 -0800 |
commit | a9cb3dcd9df373fb7a531476bf1da69fc9189e3a (patch) | |
tree | 0c9ffb606242a4f26d3f310f23711219a84316fd | |
parent | c5f10af2c8a6be37f38ccbb1e4abb16113ec8fbd (diff) |
Move SO_TIMESTAMP from different transport endpoints to epsocket.
SO_TIMESTAMP is reimplemented in ping and UDP sockets (and needs to be added for
TCP), but can just be implemented in epsocket for simplicity. This will also
make SIOCGSTAMP easier to implement.
PiperOrigin-RevId: 234179300
Change-Id: Ib5ea0b1261dc218c1a8b15a65775de0050fe3230
-rw-r--r-- | pkg/sentry/socket/epsocket/epsocket.go | 94 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 10 | ||||
-rw-r--r-- | pkg/tcpip/transport/ping/endpoint.go | 30 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/endpoint.go | 29 |
4 files changed, 69 insertions, 94 deletions
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index 5e4a269c6..3a9d1182f 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -150,11 +150,24 @@ type SocketOperations struct { Endpoint tcpip.Endpoint skType transport.SockType - // readMu protects access to readView, control, and sender. - readMu sync.Mutex `state:"nosave"` + // readMu protects access to the below fields. + readMu sync.Mutex `state:"nosave"` + // readView contains the remaining payload from the last packet. readView buffer.View - readCM tcpip.ControlMessages - sender tcpip.FullAddress + // readCM holds control message information for the last packet read + // from Endpoint. + readCM tcpip.ControlMessages + sender tcpip.FullAddress + // sockOptTimestamp corresponds to SO_TIMESTAMP. When true, timestamps + // of returned messages can be returned via control messages. When + // false, the same timestamp is instead stored and can be read via the + // SIOCGSTAMP ioctl. See socket(7). + sockOptTimestamp bool + // timestampValid indicates whether timestamp has been set. + timestampValid bool + // timestampNS holds the timestamp to use with SIOCGSTAMP. It is only + // valid when timestampValid is true. + timestampNS int64 } // New creates a new endpoint socket. @@ -515,6 +528,24 @@ func (s *SocketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error { // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name, outLen int) (interface{}, *syserr.Error) { + // TODO: Unlike other socket options, SO_TIMESTAMP is + // implemented specifically for epsocket.SocketOperations rather than + // commonEndpoint. commonEndpoint should be extended to support socket + // options where the implementation is not shared, as unix sockets need + // their own support for SO_TIMESTAMP. + if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + val := int32(0) + s.readMu.Lock() + defer s.readMu.Unlock() + if s.sockOptTimestamp { + val = 1 + } + return val, nil + } + return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen) } @@ -680,18 +711,6 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family return linux.NsecToTimeval(s.RecvTimeout()), nil - case linux.SO_TIMESTAMP: - if outLen < sizeOfInt32 { - return nil, syserr.ErrInvalidArgument - } - - var v tcpip.TimestampOption - if err := ep.GetSockOpt(&v); err != nil { - return nil, syserr.TranslateNetstackError(err) - } - - return int32(v), nil - case linux.SO_OOBINLINE: if outLen < sizeOfInt32 { return nil, syserr.ErrInvalidArgument @@ -854,6 +873,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfac // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error { + // TODO: Unlike other socket options, SO_TIMESTAMP is + // implemented specifically for epsocket.SocketOperations rather than + // commonEndpoint. commonEndpoint should be extended to support socket + // options where the implementation is not shared, as unix sockets need + // their own support for SO_TIMESTAMP. + if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + s.readMu.Lock() + defer s.readMu.Unlock() + s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0 + return nil + } + return SetSockOpt(t, s, s.Endpoint, level, name, optVal) } @@ -962,14 +996,6 @@ func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i s.SetRecvTimeout(v.ToNsecCapped()) return nil - case linux.SO_TIMESTAMP: - if len(optVal) < sizeOfInt32 { - return syserr.ErrInvalidArgument - } - - v := usermem.ByteOrder.Uint32(optVal) - return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TimestampOption(v))) - default: socket.SetSockOptEmitUnimplementedEvent(t, name) } @@ -1436,6 +1462,11 @@ func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSeq } } else { n, e = dst.CopyOut(ctx, s.readView) + // Set the control message, even if 0 bytes were read. + if e == nil && s.readCM.HasTimestamp && s.sockOptTimestamp { + s.timestampNS = s.readCM.Timestamp + s.timestampValid = true + } } copied += n s.readView.TrimFront(n) @@ -1499,6 +1530,11 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } n, err := dst.CopyOut(ctx, s.readView) + // Set the control message, even if 0 bytes were read. + if err == nil && s.readCM.HasTimestamp && s.sockOptTimestamp { + s.timestampNS = s.readCM.Timestamp + s.timestampValid = true + } var addr interface{} var addrLen uint32 if isPacket && senderRequested { @@ -1508,11 +1544,11 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe if peek { if l := len(s.readView); trunc && l > n { // isPacket must be true. - return l, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return l, addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } if isPacket || err != nil { - return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return int(n), addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } // We need to peek beyond the first message. @@ -1530,7 +1566,7 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe // We got some data, so no need to return an error. err = nil } - return int(n), nil, 0, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return int(n), nil, 0, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } var msgLen int @@ -1543,10 +1579,10 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } if trunc { - return msgLen, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return msgLen, addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } - return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) + return int(n), addr, addrLen, socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.timestampValid, Timestamp: s.timestampNS}}, syserr.FromError(err) } // RecvMsg implements the linux syscall recvmsg(2) for sockets backed by diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index fef5ba0e4..3cd431d4c 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -298,9 +298,6 @@ type Endpoint interface { // // This method does not block if there is no data pending. It will also // either return an error or data, never both. - // - // A timestamp (in ns) is optionally returned. A zero value indicates - // that no timestamp was available. Read(*FullAddress) (buffer.View, ControlMessages, *Error) // Write writes data to the endpoint's peer. This method does not block if @@ -326,9 +323,6 @@ type Endpoint interface { // Peek reads data without consuming it from the endpoint. // // This method does not block if there is no data pending. - // - // A timestamp (in ns) is optionally returned. A zero value indicates - // that no timestamp was available. Peek([][]byte) (uintptr, ControlMessages, *Error) // Connect connects the endpoint to its peer. Specifying a NIC is @@ -449,10 +443,6 @@ type QuickAckOption int // Only supported on Unix sockets. type PasscredOption int -// TimestampOption is used by SetSockOpt/GetSockOpt to specify whether -// SO_TIMESTAMP socket control messages are enabled. -type TimestampOption int - // TCPInfoOption is used by GetSockOpt to expose TCP statistics. // // TODO: Add and populate stat fields. diff --git a/pkg/tcpip/transport/ping/endpoint.go b/pkg/tcpip/transport/ping/endpoint.go index 29f6c543d..c8263a512 100644 --- a/pkg/tcpip/transport/ping/endpoint.go +++ b/pkg/tcpip/transport/ping/endpoint.go @@ -32,7 +32,6 @@ type pingPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 - hasTimestamp bool // views is used as buffer for data when its length is large // enough to store a VectorisedView. views [8]buffer.View `state:"nosave"` @@ -67,7 +66,6 @@ type endpoint struct { rcvBufSizeMax int `state:".(int)"` rcvBufSize int rcvClosed bool - rcvTimestamp bool // The following fields are protected by the mu mutex. mu sync.RWMutex `state:"nosave"` @@ -140,7 +138,6 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess p := e.rcvList.Front() e.rcvList.Remove(p) e.rcvBufSize -= p.data.Size() - ts := e.rcvTimestamp e.rcvMu.Unlock() @@ -148,12 +145,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - if ts && !p.hasTimestamp { - // Linux uses the current time. - p.timestamp = e.stack.NowNanoseconds() - } - - return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil + return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -313,12 +305,6 @@ func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) // SetSockOpt sets a socket option. Currently not supported. func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { - switch v := opt.(type) { - case tcpip.TimestampOption: - e.rcvMu.Lock() - e.rcvTimestamp = v != 0 - e.rcvMu.Unlock() - } return nil } @@ -351,15 +337,6 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.rcvMu.Unlock() return nil - case *tcpip.TimestampOption: - e.rcvMu.Lock() - *o = 0 - if e.rcvTimestamp { - *o = 1 - } - e.rcvMu.Unlock() - return nil - case *tcpip.KeepaliveEnabledOption: *o = 0 return nil @@ -702,10 +679,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv e.rcvList.PushBack(pkt) e.rcvBufSize += vv.Size() - if e.rcvTimestamp { - pkt.timestamp = e.stack.NowNanoseconds() - pkt.hasTimestamp = true - } + pkt.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index d46bf0ade..fa8f02e46 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -32,7 +32,6 @@ type udpPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 - hasTimestamp bool // views is used as buffer for data when its length is large // enough to store a VectorisedView. views [8]buffer.View `state:"nosave"` @@ -68,7 +67,6 @@ type endpoint struct { rcvBufSizeMax int `state:".(int)"` rcvBufSize int rcvClosed bool - rcvTimestamp bool // The following fields are protected by the mu mutex. mu sync.RWMutex `state:"nosave"` @@ -203,7 +201,6 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess p := e.rcvList.Front() e.rcvList.Remove(p) e.rcvBufSize -= p.data.Size() - ts := e.rcvTimestamp e.rcvMu.Unlock() @@ -211,12 +208,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - if ts && !p.hasTimestamp { - // Linux uses the current time. - p.timestamp = e.stack.NowNanoseconds() - } - - return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil + return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -397,11 +389,6 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.v6only = v != 0 - case tcpip.TimestampOption: - e.rcvMu.Lock() - e.rcvTimestamp = v != 0 - e.rcvMu.Unlock() - case tcpip.MulticastTTLOption: e.mu.Lock() e.multicastTTL = uint8(v) @@ -508,15 +495,6 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.rcvMu.Unlock() return nil - case *tcpip.TimestampOption: - e.rcvMu.Lock() - *o = 0 - if e.rcvTimestamp { - *o = 1 - } - e.rcvMu.Unlock() - return nil - case *tcpip.MulticastTTLOption: e.mu.Lock() *o = tcpip.MulticastTTLOption(e.multicastTTL) @@ -909,10 +887,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv e.rcvList.PushBack(pkt) e.rcvBufSize += vv.Size() - if e.rcvTimestamp { - pkt.timestamp = e.stack.NowNanoseconds() - pkt.hasTimestamp = true - } + pkt.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() |