diff options
32 files changed, 345 insertions, 150 deletions
diff --git a/pkg/dhcp/client.go b/pkg/dhcp/client.go index 9a4fd7ae4..37deb69ff 100644 --- a/pkg/dhcp/client.go +++ b/pkg/dhcp/client.go @@ -162,7 +162,7 @@ func (c *Client) Request(ctx context.Context, requestedAddr tcpip.Address) error // DHCPOFFER for { var addr tcpip.FullAddress - v, err := epin.Read(&addr) + v, _, err := epin.Read(&addr) if err == tcpip.ErrWouldBlock { select { case <-ch: @@ -216,7 +216,7 @@ func (c *Client) Request(ctx context.Context, requestedAddr tcpip.Address) error // DHCPACK for { var addr tcpip.FullAddress - v, err := epin.Read(&addr) + v, _, err := epin.Read(&addr) if err == tcpip.ErrWouldBlock { select { case <-ch: diff --git a/pkg/dhcp/dhcp_test.go b/pkg/dhcp/dhcp_test.go index d56b93997..ed884fcb6 100644 --- a/pkg/dhcp/dhcp_test.go +++ b/pkg/dhcp/dhcp_test.go @@ -36,7 +36,7 @@ func TestDHCP(t *testing.T) { } }() - s := stack.New([]string{ipv4.ProtocolName}, []string{udp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName}, []string{udp.ProtocolName}) const nicid tcpip.NICID = 1 if err := s.CreateNIC(nicid, id); err != nil { diff --git a/pkg/dhcp/server.go b/pkg/dhcp/server.go index d132d90b4..8816203a8 100644 --- a/pkg/dhcp/server.go +++ b/pkg/dhcp/server.go @@ -104,7 +104,7 @@ func (s *Server) reader(ctx context.Context) { for { var addr tcpip.FullAddress - v, err := s.ep.Read(&addr) + v, _, err := s.ep.Read(&addr) if err == tcpip.ErrWouldBlock { select { case <-ch: diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go index 80c46dcfa..9b73c5173 100644 --- a/pkg/sentry/fs/host/socket_test.go +++ b/pkg/sentry/fs/host/socket_test.go @@ -142,7 +142,7 @@ func TestSocketSendMsgLen0(t *testing.T) { defer sfile.DecRef() s := sfile.FileOperations.(socket.Socket) - n, terr := s.SendMsg(nil, usermem.BytesIOSequence(nil), []byte{}, 0, unix.ControlMessages{}) + n, terr := s.SendMsg(nil, usermem.BytesIOSequence(nil), []byte{}, 0, socket.ControlMessages{}) if n != 0 { t.Fatalf("socket sendmsg() failed: %v wrote: %d", terr, n) } diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 0932965e0..25c8dd885 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -887,6 +887,15 @@ func (k *Kernel) SetExitError(err error) { } } +// NowNanoseconds implements tcpip.Clock.NowNanoseconds. +func (k *Kernel) NowNanoseconds() int64 { + now, err := k.timekeeper.GetTime(sentrytime.Realtime) + if err != nil { + panic("Kernel.NowNanoseconds: " + err.Error()) + } + return now +} + // SupervisorContext returns a Context with maximum privileges in k. It should // only be used by goroutines outside the control of the emulated kernel // defined by e. diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD index 87e32df37..5500a676e 100644 --- a/pkg/sentry/socket/BUILD +++ b/pkg/sentry/socket/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/sentry/usermem", "//pkg/state", "//pkg/syserr", + "//pkg/tcpip", "//pkg/tcpip/transport/unix", ], ) diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index cb34cbc85..17ecdd11c 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -208,6 +208,31 @@ func putCmsg(buf []byte, msgType uint32, align uint, data []int32) []byte { return alignSlice(buf, align) } +func putCmsgStruct(buf []byte, msgType uint32, align uint, data interface{}) []byte { + if cap(buf)-len(buf) < linux.SizeOfControlMessageHeader { + return buf + } + ob := buf + + buf = putUint64(buf, uint64(linux.SizeOfControlMessageHeader)) + buf = putUint32(buf, linux.SOL_SOCKET) + buf = putUint32(buf, msgType) + + hdrBuf := buf + + buf = binary.Marshal(buf, usermem.ByteOrder, data) + + // Check if we went over. + if cap(buf) != cap(ob) { + return hdrBuf + } + + // Fix up length. + putUint64(ob, uint64(len(buf)-len(ob))) + + return alignSlice(buf, align) +} + // Credentials implements SCMCredentials.Credentials. func (c *scmCredentials) Credentials(t *kernel.Task) (kernel.ThreadID, auth.UID, auth.GID) { // "When a process's user and group IDs are passed over a UNIX domain @@ -261,6 +286,16 @@ func alignSlice(buf []byte, align uint) []byte { return buf[:aligned] } +// PackTimestamp packs a SO_TIMESTAMP socket control message. +func PackTimestamp(t *kernel.Task, timestamp int64, buf []byte) []byte { + return putCmsgStruct( + buf, + linux.SO_TIMESTAMP, + t.Arch().Width(), + linux.NsecToTimeval(timestamp), + ) +} + // Parse parses a raw socket control message into portable objects. func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (unix.ControlMessages, error) { var ( diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index 3fc3ea58f..5701ecfac 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -109,6 +109,7 @@ type SocketOperations struct { // readMu protects access to readView, control, and sender. readMu sync.Mutex `state:"nosave"` readView buffer.View + readCM tcpip.ControlMessages sender tcpip.FullAddress } @@ -210,12 +211,13 @@ func (s *SocketOperations) fetchReadView() *syserr.Error { s.readView = nil s.sender = tcpip.FullAddress{} - v, err := s.Endpoint.Read(&s.sender) + v, cms, err := s.Endpoint.Read(&s.sender) if err != nil { return syserr.TranslateNetstackError(err) } s.readView = v + s.readCM = cms return nil } @@ -230,7 +232,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS if dst.NumBytes() == 0 { return 0, nil } - n, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false) + n, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false) if err == syserr.ErrWouldBlock { return int64(n), syserror.ErrWouldBlock } @@ -552,6 +554,18 @@ func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, } return linux.NsecToTimeval(s.RecvTimeout()), nil + + case linux.SO_TIMESTAMP: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + var v tcpip.TimestampOption + if err := ep.GetSockOpt(&v); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(v), nil } case syscall.SOL_TCP: @@ -659,6 +673,14 @@ func SetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, level int, n binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v) s.SetRecvTimeout(v.ToNsecCapped()) return nil + + case linux.SO_TIMESTAMP: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + v := usermem.ByteOrder.Uint32(optVal) + return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TimestampOption(v))) } case syscall.SOL_TCP: @@ -823,7 +845,9 @@ func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSeq } // nonBlockingRead issues a non-blocking read. -func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSequence, peek, trunc, senderRequested bool) (int, interface{}, uint32, *syserr.Error) { +// +// TODO: Support timestamps for stream sockets. +func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSequence, peek, trunc, senderRequested bool) (int, interface{}, uint32, socket.ControlMessages, *syserr.Error) { isPacket := s.isPacketBased() // Fast path for regular reads from stream (e.g., TCP) endpoints. Note @@ -839,14 +863,14 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe s.readMu.Lock() n, err := s.coalescingRead(ctx, dst, trunc) s.readMu.Unlock() - return n, nil, 0, err + return n, nil, 0, socket.ControlMessages{}, err } s.readMu.Lock() defer s.readMu.Unlock() if err := s.fetchReadView(); err != nil { - return 0, nil, 0, err + return 0, nil, 0, socket.ControlMessages{}, err } if !isPacket && peek && trunc { @@ -854,14 +878,14 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe // amount that could be read. var rql tcpip.ReceiveQueueSizeOption if err := s.Endpoint.GetSockOpt(&rql); err != nil { - return 0, nil, 0, syserr.TranslateNetstackError(err) + return 0, nil, 0, socket.ControlMessages{}, syserr.TranslateNetstackError(err) } available := len(s.readView) + int(rql) bufLen := int(dst.NumBytes()) if available < bufLen { - return available, nil, 0, nil + return available, nil, 0, socket.ControlMessages{}, nil } - return bufLen, nil, 0, nil + return bufLen, nil, 0, socket.ControlMessages{}, nil } n, err := dst.CopyOut(ctx, s.readView) @@ -874,17 +898,18 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe if peek { if l := len(s.readView); trunc && l > n { // isPacket must be true. - return l, addr, addrLen, syserr.FromError(err) + return l, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) } if isPacket || err != nil { - return int(n), addr, addrLen, syserr.FromError(err) + return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) } // We need to peek beyond the first message. dst = dst.DropFirst(n) num, err := dst.CopyOutFrom(ctx, safemem.FromVecReaderFunc{func(dsts [][]byte) (int64, error) { - n, err := s.Endpoint.Peek(dsts) + n, _, err := s.Endpoint.Peek(dsts) + // TODO: Handle peek timestamp. if err != nil { return int64(n), syserr.TranslateNetstackError(err).ToError() } @@ -895,7 +920,7 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe // We got some data, so no need to return an error. err = nil } - return int(n), nil, 0, syserr.FromError(err) + return int(n), nil, 0, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) } var msgLen int @@ -908,15 +933,15 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } if trunc { - return msgLen, addr, addrLen, syserr.FromError(err) + return msgLen, addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) } - return int(n), addr, addrLen, syserr.FromError(err) + return int(n), addr, addrLen, socket.ControlMessages{IP: s.readCM}, syserr.FromError(err) } // RecvMsg implements the linux syscall recvmsg(2) for sockets backed by // tcpip.Endpoint. -func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, senderAddr interface{}, senderAddrLen uint32, controlMessages unix.ControlMessages, err *syserr.Error) { +func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, senderAddr interface{}, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) { trunc := flags&linux.MSG_TRUNC != 0 peek := flags&linux.MSG_PEEK != 0 @@ -924,7 +949,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags // Stream sockets ignore the sender address. senderRequested = false } - n, senderAddr, senderAddrLen, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) + n, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) if err != syserr.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { return } @@ -936,25 +961,25 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags defer s.EventUnregister(&e) for { - n, senderAddr, senderAddrLen, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) + n, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) if err != syserr.ErrWouldBlock { return } if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if err == syserror.ETIMEDOUT { - return 0, nil, 0, unix.ControlMessages{}, syserr.ErrTryAgain + return 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain } - return 0, nil, 0, unix.ControlMessages{}, syserr.FromError(err) + return 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) } } } // SendMsg implements the linux syscall sendmsg(2) for sockets backed by // tcpip.Endpoint. -func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages unix.ControlMessages) (int, *syserr.Error) { - // Reject control messages. - if !controlMessages.Empty() { +func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { + // Reject Unix control messages. + if !controlMessages.Unix.Empty() { return 0, syserr.ErrInvalidArgument } diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index defa3db2c..02fad1c60 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -57,6 +57,8 @@ type socketOperations struct { queue waiter.Queue } +var _ = socket.Socket(&socketOperations{}) + func newSocketFile(ctx context.Context, fd int, nonblock bool) (*fs.File, *syserr.Error) { s := &socketOperations{fd: fd} if err := fdnotifier.AddFD(int32(fd), &s.queue); err != nil { @@ -339,14 +341,14 @@ func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt [ } // RecvMsg implements socket.Socket.RecvMsg. -func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, interface{}, uint32, unix.ControlMessages, *syserr.Error) { +func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, interface{}, uint32, socket.ControlMessages, *syserr.Error) { // Whitelist flags. // // FIXME: We can't support MSG_ERRQUEUE because it uses ancillary // messages that netstack/tcpip/transport/unix doesn't understand. Kill the // Socket interface's dependence on netstack. if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_PEEK|syscall.MSG_TRUNC) != 0 { - return 0, nil, 0, unix.ControlMessages{}, syserr.ErrInvalidArgument + return 0, nil, 0, socket.ControlMessages{}, syserr.ErrInvalidArgument } var senderAddr []byte @@ -411,11 +413,11 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags } } - return int(n), senderAddr, uint32(len(senderAddr)), unix.ControlMessages{}, syserr.FromError(err) + return int(n), senderAddr, uint32(len(senderAddr)), socket.ControlMessages{}, syserr.FromError(err) } // SendMsg implements socket.Socket.SendMsg. -func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages unix.ControlMessages) (int, *syserr.Error) { +func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { // Whitelist flags. if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { return 0, syserr.ErrInvalidArgument diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 2d0e59ceb..0b8f528d0 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -305,7 +305,7 @@ func (s *Socket) GetPeerName(t *kernel.Task) (interface{}, uint32, *syserr.Error } // RecvMsg implements socket.Socket.RecvMsg. -func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, interface{}, uint32, unix.ControlMessages, *syserr.Error) { +func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, interface{}, uint32, socket.ControlMessages, *syserr.Error) { from := linux.SockAddrNetlink{ Family: linux.AF_NETLINK, PortID: 0, @@ -323,7 +323,7 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have if trunc { n = int64(r.MsgSize) } - return int(n), from, fromLen, unix.ControlMessages{}, syserr.FromError(err) + return int(n), from, fromLen, socket.ControlMessages{}, syserr.FromError(err) } // We'll have to block. Register for notification and keep trying to @@ -337,14 +337,14 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have if trunc { n = int64(r.MsgSize) } - return int(n), from, fromLen, unix.ControlMessages{}, syserr.FromError(err) + return int(n), from, fromLen, socket.ControlMessages{}, syserr.FromError(err) } if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if err == syserror.ETIMEDOUT { - return 0, nil, 0, unix.ControlMessages{}, syserr.ErrTryAgain + return 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain } - return 0, nil, 0, unix.ControlMessages{}, syserr.FromError(err) + return 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) } } } @@ -459,7 +459,7 @@ func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error } // sendMsg is the core of message send, used for SendMsg and Write. -func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, flags int, controlMessages unix.ControlMessages) (int, *syserr.Error) { +func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { dstPort := int32(0) if len(to) != 0 { @@ -506,12 +506,12 @@ func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, } // SendMsg implements socket.Socket.SendMsg. -func (s *Socket) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages unix.ControlMessages) (int, *syserr.Error) { +func (s *Socket) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { return s.sendMsg(t, src, to, flags, controlMessages) } // Write implements fs.FileOperations.Write. func (s *Socket) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { - n, err := s.sendMsg(ctx, src, nil, 0, unix.ControlMessages{}) + n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{}) return int64(n), err.ToError() } diff --git a/pkg/sentry/socket/rpcinet/socket.go b/pkg/sentry/socket/rpcinet/socket.go index 574d99ba5..15047df01 100644 --- a/pkg/sentry/socket/rpcinet/socket.go +++ b/pkg/sentry/socket/rpcinet/socket.go @@ -402,7 +402,7 @@ func rpcRecvMsg(t *kernel.Task, req *pb.SyscallRequest_Recvmsg) (*pb.RecvmsgResp } // RecvMsg implements socket.Socket.RecvMsg. -func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, interface{}, uint32, unix.ControlMessages, *syserr.Error) { +func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, interface{}, uint32, socket.ControlMessages, *syserr.Error) { req := &pb.SyscallRequest_Recvmsg{&pb.RecvmsgRequest{ Fd: s.fd, Length: uint32(dst.NumBytes()), @@ -414,10 +414,10 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags res, err := rpcRecvMsg(t, req) if err == nil { n, e := dst.CopyOut(t, res.Data) - return int(n), res.Address.GetAddress(), res.Address.GetLength(), unix.ControlMessages{}, syserr.FromError(e) + return int(n), res.Address.GetAddress(), res.Address.GetLength(), socket.ControlMessages{}, syserr.FromError(e) } if err != syserr.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { - return 0, nil, 0, unix.ControlMessages{}, err + return 0, nil, 0, socket.ControlMessages{}, err } // We'll have to block. Register for notifications and keep trying to @@ -430,17 +430,17 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags res, err := rpcRecvMsg(t, req) if err == nil { n, e := dst.CopyOut(t, res.Data) - return int(n), res.Address.GetAddress(), res.Address.GetLength(), unix.ControlMessages{}, syserr.FromError(e) + return int(n), res.Address.GetAddress(), res.Address.GetLength(), socket.ControlMessages{}, syserr.FromError(e) } if err != syserr.ErrWouldBlock { - return 0, nil, 0, unix.ControlMessages{}, err + return 0, nil, 0, socket.ControlMessages{}, err } if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if err == syserror.ETIMEDOUT { - return 0, nil, 0, unix.ControlMessages{}, syserr.ErrTryAgain + return 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain } - return 0, nil, 0, unix.ControlMessages{}, syserr.FromError(err) + return 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) } } } @@ -459,14 +459,14 @@ func rpcSendMsg(t *kernel.Task, req *pb.SyscallRequest_Sendmsg) (uint32, *syserr } // SendMsg implements socket.Socket.SendMsg. -func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages unix.ControlMessages) (int, *syserr.Error) { +func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { // Whitelist flags. if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { return 0, syserr.ErrInvalidArgument } - // Reject control messages. - if !controlMessages.Empty() { + // Reject Unix control messages. + if !controlMessages.Unix.Empty() { return 0, syserr.ErrInvalidArgument } diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index be3026bfa..bd4858a34 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -31,9 +31,17 @@ import ( ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" "gvisor.googlesource.com/gvisor/pkg/syserr" + "gvisor.googlesource.com/gvisor/pkg/tcpip" "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/unix" ) +// ControlMessages represents the union of unix control messages and tcpip +// control messages. +type ControlMessages struct { + Unix unix.ControlMessages + IP tcpip.ControlMessages +} + // Socket is the interface containing socket syscalls used by the syscall layer // to redirect them to the appropriate implementation. type Socket interface { @@ -78,11 +86,11 @@ type Socket interface { // // senderAddrLen is the address length to be returned to the application, // not necessarily the actual length of the address. - RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, senderAddr interface{}, senderAddrLen uint32, controlMessages unix.ControlMessages, err *syserr.Error) + RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, senderAddr interface{}, senderAddrLen uint32, controlMessages ControlMessages, err *syserr.Error) // SendMsg implements the sendmsg(2) linux syscall. SendMsg does not take // ownership of the ControlMessage on error. - SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages unix.ControlMessages) (n int, err *syserr.Error) + SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages ControlMessages) (n int, err *syserr.Error) // SetRecvTimeout sets the timeout (in ns) for recv operations. Zero means // no timeout. diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index a4b414851..f83156c8e 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -358,10 +358,10 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO // SendMsg implements the linux syscall sendmsg(2) for unix sockets backed by // a unix.Endpoint. -func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages unix.ControlMessages) (int, *syserr.Error) { +func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { w := EndpointWriter{ Endpoint: s.ep, - Control: controlMessages, + Control: controlMessages.Unix, To: nil, } if len(to) > 0 { @@ -452,7 +452,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS // RecvMsg implements the linux syscall recvmsg(2) for sockets backed by // a unix.Endpoint. -func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, senderAddr interface{}, senderAddrLen uint32, controlMessages unix.ControlMessages, err *syserr.Error) { +func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, senderAddr interface{}, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) { trunc := flags&linux.MSG_TRUNC != 0 peek := flags&linux.MSG_PEEK != 0 @@ -490,7 +490,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags if trunc { n = int64(r.MsgSize) } - return int(n), from, fromLen, r.Control, syserr.FromError(err) + return int(n), from, fromLen, socket.ControlMessages{Unix: r.Control}, syserr.FromError(err) } // We'll have to block. Register for notification and keep trying to @@ -509,14 +509,14 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags if trunc { n = int64(r.MsgSize) } - return int(n), from, fromLen, r.Control, syserr.FromError(err) + return int(n), from, fromLen, socket.ControlMessages{Unix: r.Control}, syserr.FromError(err) } if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if err == syserror.ETIMEDOUT { - return 0, nil, 0, unix.ControlMessages{}, syserr.ErrTryAgain + return 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain } - return 0, nil, 0, unix.ControlMessages{}, syserr.FromError(err) + return 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) } } } diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index 48c072e96..1a2e8573e 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -440,6 +440,7 @@ var SocketProtocol = map[int32]abi.ValueSet{ var controlMessageType = map[int32]string{ linux.SCM_RIGHTS: "SCM_RIGHTS", linux.SCM_CREDENTIALS: "SCM_CREDENTIALS", + linux.SO_TIMESTAMP: "SO_TIMESTAMP", } func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) string { @@ -477,7 +478,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) typ = fmt.Sprint(h.Type) } - if h.Length > uint64(len(buf)-i) { + if h.Length > uint64(len(buf)-i+linux.SizeOfControlMessageHeader) { strs = append(strs, fmt.Sprintf( "{level=%s, type=%s, length=%d, content extends beyond buffer}", level, @@ -546,6 +547,32 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) i += control.AlignUp(length, width) + case linux.SO_TIMESTAMP: + if length < linux.SizeOfTimeval { + strs = append(strs, fmt.Sprintf( + "{level=%s, type=%s, length=%d, content too short}", + level, + typ, + h.Length, + )) + i += control.AlignUp(length, width) + break + } + + var tv linux.Timeval + binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], usermem.ByteOrder, &tv) + + strs = append(strs, fmt.Sprintf( + "{level=%s, type=%s, length=%d, Sec: %d, Usec: %d}", + level, + typ, + h.Length, + tv.Sec, + tv.Usec, + )) + + i += control.AlignUp(length, width) + default: panic("unreachable") } diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 70c618398..6258a1539 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -731,10 +731,11 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i // Fast path when no control message nor name buffers are provided. if msg.ControlLen == 0 && msg.NameLen == 0 { - n, _, _, _, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0) + n, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0) if err != nil { return 0, syserror.ConvertIntr(err.ToError(), kernel.ERESTARTSYS) } + cms.Unix.Release() return uintptr(n), nil } @@ -745,17 +746,21 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i if e != nil { return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) } - defer cms.Release() + defer cms.Unix.Release() controlData := make([]byte, 0, msg.ControlLen) if cr, ok := s.(unix.Credentialer); ok && cr.Passcred() { - creds, _ := cms.Credentials.(control.SCMCredentials) + creds, _ := cms.Unix.Credentials.(control.SCMCredentials) controlData = control.PackCredentials(t, creds, controlData) } - if cms.Rights != nil { - controlData = control.PackRights(t, cms.Rights.(control.SCMRights), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData) + if cms.IP.HasTimestamp { + controlData = control.PackTimestamp(t, cms.IP.Timestamp, controlData) + } + + if cms.Unix.Rights != nil { + controlData = control.PackRights(t, cms.Unix.Rights.(control.SCMRights), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData) } // Copy the address to the caller. @@ -823,7 +828,7 @@ func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, f } n, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0) - cm.Release() + cm.Unix.Release() if e != nil { return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) } @@ -997,7 +1002,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme } // Call the syscall implementation. - n, e := s.SendMsg(t, src, to, int(flags), controlMessages) + n, e := s.SendMsg(t, src, to, int(flags), socket.ControlMessages{Unix: controlMessages}) err = handleIOError(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendmsg", file) if err != nil { controlMessages.Release() @@ -1048,7 +1053,7 @@ func sendTo(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, fla } // Call the syscall implementation. - n, e := s.SendMsg(t, src, to, int(flags), control.New(t, s, nil)) + n, e := s.SendMsg(t, src, to, int(flags), socket.ControlMessages{Unix: control.New(t, s, nil)}) return uintptr(n), handleIOError(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendto", file) } diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go index 96a2d670d..5aa6b1aa2 100644 --- a/pkg/tcpip/adapters/gonet/gonet.go +++ b/pkg/tcpip/adapters/gonet/gonet.go @@ -268,7 +268,7 @@ type opErrorer interface { // commonRead implements the common logic between net.Conn.Read and // net.PacketConn.ReadFrom. func commonRead(ep tcpip.Endpoint, wq *waiter.Queue, deadline <-chan struct{}, addr *tcpip.FullAddress, errorer opErrorer) ([]byte, error) { - read, err := ep.Read(addr) + read, _, err := ep.Read(addr) if err == tcpip.ErrWouldBlock { // Create wait queue entry that notifies a channel. @@ -276,7 +276,7 @@ func commonRead(ep tcpip.Endpoint, wq *waiter.Queue, deadline <-chan struct{}, a wq.EventRegister(&waitEntry, waiter.EventIn) defer wq.EventUnregister(&waitEntry) for { - read, err = ep.Read(addr) + read, _, err = ep.Read(addr) if err != tcpip.ErrWouldBlock { break } diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go index 2f86469eb..e3d0c6c84 100644 --- a/pkg/tcpip/adapters/gonet/gonet_test.go +++ b/pkg/tcpip/adapters/gonet/gonet_test.go @@ -47,7 +47,7 @@ func TestTimeouts(t *testing.T) { func newLoopbackStack() (*stack.Stack, *tcpip.Error) { // Create the stack and add a NIC. - s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName, udp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName, udp.ProtocolName}) if err := s.CreateNIC(NICID, loopback.New()); err != nil { return nil, err diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index 91ffdce4b..47b10e64e 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -32,7 +32,7 @@ type testContext struct { } func newTestContext(t *testing.T) *testContext { - s := stack.New([]string{ipv4.ProtocolName, arp.ProtocolName}, []string{ipv4.PingProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName, arp.ProtocolName}, []string{ipv4.PingProtocolName}) const defaultMTU = 65536 id, linkEP := channel.New(256, defaultMTU, stackLinkAddr) diff --git a/pkg/tcpip/network/ipv4/icmp_test.go b/pkg/tcpip/network/ipv4/icmp_test.go index 378fba74b..c55aa1835 100644 --- a/pkg/tcpip/network/ipv4/icmp_test.go +++ b/pkg/tcpip/network/ipv4/icmp_test.go @@ -26,7 +26,7 @@ type testContext struct { } func newTestContext(t *testing.T) *testContext { - s := stack.New([]string{ipv4.ProtocolName}, []string{ipv4.PingProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName}, []string{ipv4.PingProtocolName}) const defaultMTU = 65536 id, linkEP := channel.New(256, defaultMTU, "") diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index 332929c85..ef5c7ec60 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -113,7 +113,7 @@ func main() { // Create the stack with ipv4 and tcp protocols, then add a tun-based // NIC and ipv4 address. - s := stack.New([]string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) mtu, err := rawfile.GetMTU(tunName) if err != nil { @@ -183,7 +183,7 @@ func main() { // connection from its side. wq.EventRegister(&waitEntry, waiter.EventIn) for { - v, err := ep.Read(nil) + v, _, err := ep.Read(nil) if err != nil { if err == tcpip.ErrClosedForReceive { break diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index 10cd701af..8c166f643 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -42,7 +42,7 @@ func echo(wq *waiter.Queue, ep tcpip.Endpoint) { defer wq.EventUnregister(&waitEntry) for { - v, err := ep.Read(nil) + v, _, err := ep.Read(nil) if err != nil { if err == tcpip.ErrWouldBlock { <-notifyCh @@ -99,7 +99,7 @@ func main() { // Create the stack with ip and tcp protocols, then add a tun-based // NIC and address. - s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName}, []string{tcp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName}, []string{tcp.ProtocolName}) mtu, err := rawfile.GetMTU(tunName) if err != nil { diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 558ecdb72..b480bf812 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -270,6 +270,9 @@ type Stack struct { // If not nil, then any new endpoints will have this probe function // invoked everytime they receive a TCP segment. tcpProbeFunc TCPProbeFunc + + // clock is used to generate user-visible times. + clock tcpip.Clock } // New allocates a new networking stack with only the requested networking and @@ -279,7 +282,7 @@ type Stack struct { // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the // stack. Please refer to individual protocol implementations as to what options // are supported. -func New(network []string, transport []string) *Stack { +func New(clock tcpip.Clock, network []string, transport []string) *Stack { s := &Stack{ transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), @@ -287,6 +290,7 @@ func New(network []string, transport []string) *Stack { nics: make(map[tcpip.NICID]*NIC), linkAddrCache: newLinkAddrCache(ageLimit, resolutionTimeout, resolutionAttempts), PortManager: ports.NewPortManager(), + clock: clock, } // Add specified network protocols. @@ -388,6 +392,11 @@ func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h f } } +// NowNanoseconds implements tcpip.Clock.NowNanoseconds. +func (s *Stack) NowNanoseconds() int64 { + return s.clock.NowNanoseconds() +} + // Stats returns a snapshot of the current stats. // // NOTE: The underlying stats are updated using atomic instructions as a result @@ -474,6 +483,12 @@ func (s *Stack) CreateDisabledNIC(id tcpip.NICID, linkEP tcpip.LinkEndpointID) * return s.createNIC(id, "", linkEP, false) } +// CreateDisabledNamedNIC is a combination of CreateNamedNIC and +// CreateDisabledNIC. +func (s *Stack) CreateDisabledNamedNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID) *tcpip.Error { + return s.createNIC(id, name, linkEP, false) +} + // EnableNIC enables the given NIC so that the link-layer endpoint can start // delivering packets to it. func (s *Stack) EnableNIC(id tcpip.NICID) *tcpip.Error { diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index b416065d7..ea7dccdc2 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -176,7 +176,7 @@ func TestNetworkReceive(t *testing.T) { // Create a stack with the fake network protocol, one nic, and two // addresses attached to it: 1 & 2. id, linkEP := channel.New(10, defaultMTU, "") - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) if err := s.CreateNIC(1, id); err != nil { t.Fatalf("CreateNIC failed: %v", err) } @@ -270,7 +270,7 @@ func TestNetworkSend(t *testing.T) { // address: 1. The route table sends all packets through the only // existing nic. id, linkEP := channel.New(10, defaultMTU, "") - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) if err := s.CreateNIC(1, id); err != nil { t.Fatalf("NewNIC failed: %v", err) } @@ -292,7 +292,7 @@ func TestNetworkSendMultiRoute(t *testing.T) { // Create a stack with the fake network protocol, two nics, and two // addresses per nic, the first nic has odd address, the second one has // even addresses. - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id1, linkEP1 := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id1); err != nil { @@ -371,7 +371,7 @@ func TestRoutes(t *testing.T) { // Create a stack with the fake network protocol, two nics, and two // addresses per nic, the first nic has odd address, the second one has // even addresses. - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id1, _ := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id1); err != nil { @@ -435,7 +435,7 @@ func TestRoutes(t *testing.T) { } func TestAddressRemoval(t *testing.T) { - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id, linkEP := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id); err != nil { @@ -479,7 +479,7 @@ func TestAddressRemoval(t *testing.T) { } func TestDelayedRemovalDueToRoute(t *testing.T) { - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id, linkEP := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id); err != nil { @@ -547,7 +547,7 @@ func TestDelayedRemovalDueToRoute(t *testing.T) { } func TestPromiscuousMode(t *testing.T) { - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id, linkEP := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id); err != nil { @@ -607,7 +607,7 @@ func TestAddressSpoofing(t *testing.T) { srcAddr := tcpip.Address("\x01") dstAddr := tcpip.Address("\x02") - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id, _ := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id); err != nil { @@ -648,7 +648,7 @@ func TestAddressSpoofing(t *testing.T) { // Set the subnet, then check that packet is delivered. func TestSubnetAcceptsMatchingPacket(t *testing.T) { - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id, linkEP := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id); err != nil { @@ -682,7 +682,7 @@ func TestSubnetAcceptsMatchingPacket(t *testing.T) { // Set destination outside the subnet, then check it doesn't get delivered. func TestSubnetRejectsNonmatchingPacket(t *testing.T) { - s := stack.New([]string{"fakeNet"}, nil) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, nil) id, linkEP := channel.New(10, defaultMTU, "") if err := s.CreateNIC(1, id); err != nil { @@ -714,7 +714,7 @@ func TestSubnetRejectsNonmatchingPacket(t *testing.T) { } func TestNetworkOptions(t *testing.T) { - s := stack.New([]string{"fakeNet"}, []string{}) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, []string{}) // Try an unsupported network protocol. if err := s.SetNetworkProtocolOption(tcpip.NetworkProtocolNumber(99999), fakeNetGoodOption(false)); err != tcpip.ErrUnknownProtocol { diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index 7e072e96e..b870ab375 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -46,8 +46,8 @@ func (*fakeTransportEndpoint) Readiness(mask waiter.EventMask) waiter.EventMask return mask } -func (*fakeTransportEndpoint) Read(*tcpip.FullAddress) (buffer.View, *tcpip.Error) { - return buffer.View{}, nil +func (*fakeTransportEndpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { + return buffer.View{}, tcpip.ControlMessages{}, nil } func (f *fakeTransportEndpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, *tcpip.Error) { @@ -67,8 +67,8 @@ func (f *fakeTransportEndpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) return uintptr(len(v)), nil } -func (f *fakeTransportEndpoint) Peek([][]byte) (uintptr, *tcpip.Error) { - return 0, nil +func (f *fakeTransportEndpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) { + return 0, tcpip.ControlMessages{}, nil } // SetSockOpt sets a socket option. Currently not supported. @@ -210,7 +210,7 @@ func (f *fakeTransportProtocol) Option(option interface{}) *tcpip.Error { func TestTransportReceive(t *testing.T) { id, linkEP := channel.New(10, defaultMTU, "") - s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, []string{"fakeTrans"}) if err := s.CreateNIC(1, id); err != nil { t.Fatalf("CreateNIC failed: %v", err) } @@ -270,7 +270,7 @@ func TestTransportReceive(t *testing.T) { func TestTransportControlReceive(t *testing.T) { id, linkEP := channel.New(10, defaultMTU, "") - s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, []string{"fakeTrans"}) if err := s.CreateNIC(1, id); err != nil { t.Fatalf("CreateNIC failed: %v", err) } @@ -336,7 +336,7 @@ func TestTransportControlReceive(t *testing.T) { func TestTransportSend(t *testing.T) { id, _ := channel.New(10, defaultMTU, "") - s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, []string{"fakeTrans"}) if err := s.CreateNIC(1, id); err != nil { t.Fatalf("CreateNIC failed: %v", err) } @@ -373,7 +373,7 @@ func TestTransportSend(t *testing.T) { } func TestTransportOptions(t *testing.T) { - s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}) + s := stack.New(&tcpip.StdClock{}, []string{"fakeNet"}, []string{"fakeTrans"}) // Try an unsupported transport protocol. if err := s.SetTransportProtocolOption(tcpip.TransportProtocolNumber(99999), fakeTransportGoodOption(false)); err != tcpip.ErrUnknownProtocol { diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index f3a94f353..f9df1d989 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -23,6 +23,7 @@ import ( "fmt" "strconv" "strings" + "time" "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" "gvisor.googlesource.com/gvisor/pkg/waiter" @@ -80,6 +81,24 @@ var ( errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask") ) +// A Clock provides the current time. +// +// Times returned by a Clock should always be used for application-visible +// time, but never for netstack internal timekeeping. +type Clock interface { + // NowNanoseconds returns the current real time as a number of + // nanoseconds since some epoch. + NowNanoseconds() int64 +} + +// StdClock implements Clock with the time package. +type StdClock struct{} + +// NowNanoseconds implements Clock.NowNanoseconds. +func (*StdClock) NowNanoseconds() int64 { + return time.Now().UnixNano() +} + // Address is a byte slice cast as a string that represents the address of a // network node. Or, in the case of unix endpoints, it may represent a path. type Address string @@ -210,6 +229,16 @@ func (s SlicePayload) Size() int { return len(s) } +// A ControlMessages contains socket control messages for IP sockets. +type ControlMessages struct { + // HasTimestamp indicates whether Timestamp is valid/set. + HasTimestamp bool + + // Timestamp is the time (in ns) that the last packed used to create + // the read data was received. + Timestamp int64 +} + // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) // that exposes functionality like read, write, connect, etc. to users of the // networking stack. @@ -219,9 +248,13 @@ type Endpoint interface { Close() // Read reads data from the endpoint and optionally returns the sender. - // This method does not block if there is no data pending. - // It will also either return an error or data, never both. - Read(*FullAddress) (buffer.View, *Error) + // + // This method does not block if there is no data pending. It will also + // either return an error or data, never both. + // + // A timestamp (in ns) is optionally returned. A zero value indicates + // that no timestamp was available. + Read(*FullAddress) (buffer.View, ControlMessages, *Error) // Write writes data to the endpoint's peer. This method does not block if // the data cannot be written. @@ -238,7 +271,10 @@ type Endpoint interface { // Peek reads data without consuming it from the endpoint. // // This method does not block if there is no data pending. - Peek([][]byte) (uintptr, *Error) + // + // A timestamp (in ns) is optionally returned. A zero value indicates + // that no timestamp was available. + Peek([][]byte) (uintptr, ControlMessages, *Error) // Connect connects the endpoint to its peer. Specifying a NIC is // optional. @@ -347,6 +383,10 @@ type ReuseAddressOption int // Only supported on Unix sockets. type PasscredOption int +// TimestampOption is used by SetSockOpt/GetSockOpt to specify whether +// SO_TIMESTAMP socket control messages are enabled. +type TimestampOption int + // TCPInfoOption is used by GetSockOpt to expose TCP statistics. // // TODO: Add and populate stat fields. diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 5d62589d8..d84171b0c 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -374,7 +374,7 @@ func (e *endpoint) cleanup() { } // Read reads data from the endpoint. -func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, *tcpip.Error) { +func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { e.mu.RLock() // The endpoint can be read if it's connected, or if it's already closed // but has some pending unread data. Also note that a RST being received @@ -383,9 +383,9 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, *tcpip.Error) { if s := e.state; s != stateConnected && s != stateClosed && e.rcvBufUsed == 0 { e.mu.RUnlock() if s == stateError { - return buffer.View{}, e.hardError + return buffer.View{}, tcpip.ControlMessages{}, e.hardError } - return buffer.View{}, tcpip.ErrInvalidEndpointState + return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidEndpointState } e.rcvListMu.Lock() @@ -394,7 +394,7 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, *tcpip.Error) { e.mu.RUnlock() - return v, err + return v, tcpip.ControlMessages{}, err } func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) { @@ -498,7 +498,7 @@ func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, *tc // Peek reads data without consuming it from the endpoint. // // This method does not block if there is no data pending. -func (e *endpoint) Peek(vec [][]byte) (uintptr, *tcpip.Error) { +func (e *endpoint) Peek(vec [][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) { e.mu.RLock() defer e.mu.RUnlock() @@ -506,9 +506,9 @@ func (e *endpoint) Peek(vec [][]byte) (uintptr, *tcpip.Error) { // but has some pending unread data. if s := e.state; s != stateConnected && s != stateClosed { if s == stateError { - return 0, e.hardError + return 0, tcpip.ControlMessages{}, e.hardError } - return 0, tcpip.ErrInvalidEndpointState + return 0, tcpip.ControlMessages{}, tcpip.ErrInvalidEndpointState } e.rcvListMu.Lock() @@ -516,9 +516,9 @@ func (e *endpoint) Peek(vec [][]byte) (uintptr, *tcpip.Error) { if e.rcvBufUsed == 0 { if e.rcvClosed || e.state != stateConnected { - return 0, tcpip.ErrClosedForReceive + return 0, tcpip.ControlMessages{}, tcpip.ErrClosedForReceive } - return 0, tcpip.ErrWouldBlock + return 0, tcpip.ControlMessages{}, tcpip.ErrWouldBlock } // Make a copy of vec so we can modify the slide headers. @@ -534,7 +534,7 @@ func (e *endpoint) Peek(vec [][]byte) (uintptr, *tcpip.Error) { for len(v) > 0 { if len(vec) == 0 { - return num, nil + return num, tcpip.ControlMessages{}, nil } if len(vec[0]) == 0 { vec = vec[1:] @@ -549,7 +549,7 @@ func (e *endpoint) Peek(vec [][]byte) (uintptr, *tcpip.Error) { } } - return num, nil + return num, tcpip.ControlMessages{}, nil } // zeroReceiveWindow checks if the receive window to be announced now would be diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 118d861ba..3c21a1ec3 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -147,7 +147,7 @@ func TestSimpleReceive(t *testing.T) { c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - if _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { + if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -169,7 +169,7 @@ func TestSimpleReceive(t *testing.T) { } // Receive data. - v, err := c.EP.Read(nil) + v, _, err := c.EP.Read(nil) if err != nil { t.Fatalf("Unexpected error from Read: %v", err) } @@ -199,7 +199,7 @@ func TestOutOfOrderReceive(t *testing.T) { c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - if _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { + if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -226,7 +226,7 @@ func TestOutOfOrderReceive(t *testing.T) { // Wait 200ms and check that no data has been received. time.Sleep(200 * time.Millisecond) - if _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { + if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -243,7 +243,7 @@ func TestOutOfOrderReceive(t *testing.T) { // Receive data. read := make([]byte, 0, 6) for len(read) < len(data) { - v, err := c.EP.Read(nil) + v, _, err := c.EP.Read(nil) if err != nil { if err == tcpip.ErrWouldBlock { // Wait for receive to be notified. @@ -284,7 +284,7 @@ func TestOutOfOrderFlood(t *testing.T) { opt := tcpip.ReceiveBufferSizeOption(10) c.CreateConnected(789, 30000, &opt) - if _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { + if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -361,7 +361,7 @@ func TestRstOnCloseWithUnreadData(t *testing.T) { c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - if _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { + if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -414,7 +414,7 @@ func TestFullWindowReceive(t *testing.T) { c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - _, err := c.EP.Read(nil) + _, _, err := c.EP.Read(nil) if err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -449,7 +449,7 @@ func TestFullWindowReceive(t *testing.T) { ) // Receive data and check it. - v, err := c.EP.Read(nil) + v, _, err := c.EP.Read(nil) if err != nil { t.Fatalf("Unexpected error from Read: %v", err) } @@ -487,7 +487,7 @@ func TestNoWindowShrinking(t *testing.T) { c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - _, err := c.EP.Read(nil) + _, _, err := c.EP.Read(nil) if err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -551,7 +551,7 @@ func TestNoWindowShrinking(t *testing.T) { // Receive data and check it. read := make([]byte, 0, 10) for len(read) < len(data) { - v, err := c.EP.Read(nil) + v, _, err := c.EP.Read(nil) if err != nil { t.Fatalf("Unexpected error from Read: %v", err) } @@ -954,7 +954,7 @@ func TestZeroScaledWindowReceive(t *testing.T) { } // Read some data. An ack should be sent in response to that. - v, err := c.EP.Read(nil) + v, _, err := c.EP.Read(nil) if err != nil { t.Fatalf("Unexpected error from Read: %v", err) } @@ -1337,7 +1337,7 @@ func TestReceiveOnResetConnection(t *testing.T) { loop: for { - switch _, err := c.EP.Read(nil); err { + switch _, _, err := c.EP.Read(nil); err { case nil: t.Fatalf("Unexpected success.") case tcpip.ErrWouldBlock: @@ -2293,7 +2293,7 @@ func TestReadAfterClosedState(t *testing.T) { c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - if _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { + if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { t.Fatalf("Unexpected error from Read: %v", err) } @@ -2345,7 +2345,7 @@ func TestReadAfterClosedState(t *testing.T) { // Check that peek works. peekBuf := make([]byte, 10) - n, err := c.EP.Peek([][]byte{peekBuf}) + n, _, err := c.EP.Peek([][]byte{peekBuf}) if err != nil { t.Fatalf("Unexpected error from Peek: %v", err) } @@ -2356,7 +2356,7 @@ func TestReadAfterClosedState(t *testing.T) { } // Receive data. - v, err := c.EP.Read(nil) + v, _, err := c.EP.Read(nil) if err != nil { t.Fatalf("Unexpected error from Read: %v", err) } @@ -2367,11 +2367,11 @@ func TestReadAfterClosedState(t *testing.T) { // Now that we drained the queue, check that functions fail with the // right error code. - if _, err := c.EP.Read(nil); err != tcpip.ErrClosedForReceive { + if _, _, err := c.EP.Read(nil); err != tcpip.ErrClosedForReceive { t.Fatalf("Unexpected return from Read: got %v, want %v", err, tcpip.ErrClosedForReceive) } - if _, err := c.EP.Peek([][]byte{peekBuf}); err != tcpip.ErrClosedForReceive { + if _, _, err := c.EP.Peek([][]byte{peekBuf}); err != tcpip.ErrClosedForReceive { t.Fatalf("Unexpected return from Peek: got %v, want %v", err, tcpip.ErrClosedForReceive) } } @@ -2479,7 +2479,7 @@ func checkSendBufferSize(t *testing.T, ep tcpip.Endpoint, v int) { } func TestDefaultBufferSizes(t *testing.T) { - s := stack.New([]string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) // Check the default values. ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{}) @@ -2525,7 +2525,7 @@ func TestDefaultBufferSizes(t *testing.T) { } func TestMinMaxBufferSizes(t *testing.T) { - s := stack.New([]string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) // Check the default values. ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{}) @@ -2575,7 +2575,7 @@ func TestSelfConnect(t *testing.T) { // it checks that if an endpoint binds to say 127.0.0.1:1000 then // connects to 127.0.0.1:1000, then it will be connected to itself, and // is able to send and receive data through the same endpoint. - s := stack.New([]string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName}, []string{tcp.ProtocolName}) id := loopback.New() if testing.Verbose() { @@ -2637,13 +2637,13 @@ func TestSelfConnect(t *testing.T) { // Read back what was written. wq.EventUnregister(&waitEntry) wq.EventRegister(&waitEntry, waiter.EventIn) - rd, err := ep.Read(nil) + rd, _, err := ep.Read(nil) if err != nil { if err != tcpip.ErrWouldBlock { t.Fatalf("Read failed: %v", err) } <-notifyCh - rd, err = ep.Read(nil) + rd, _, err = ep.Read(nil) if err != nil { t.Fatalf("Read failed: %v", err) } diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go index d12081bb7..335262e43 100644 --- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go @@ -95,7 +95,7 @@ func TestTimeStampEnabledConnect(t *testing.T) { // There should be 5 views to read and each of them should // contain the same data. for i := 0; i < 5; i++ { - got, err := c.EP.Read(nil) + got, _, err := c.EP.Read(nil) if err != nil { t.Fatalf("Unexpected error from Read: %v", err) } @@ -296,7 +296,7 @@ func TestSegmentDropWhenTimestampMissing(t *testing.T) { } // Issue a read and we should data. - got, err := c.EP.Read(nil) + got, _, err := c.EP.Read(nil) if err != nil { t.Fatalf("Unexpected error from Read: %v", err) } diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index 6a402d150..eb928553f 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -129,7 +129,7 @@ type Context struct { // New allocates and initializes a test context containing a new // stack and a link-layer endpoint. func New(t *testing.T, mtu uint32) *Context { - s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName}) // Allow minimum send/receive buffer sizes to be 1 during tests. if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultBufferSize, tcp.DefaultBufferSize * 10}); err != nil { diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 80fa88c4c..f86fc6d5a 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -19,6 +19,8 @@ type udpPacket struct { udpPacketEntry senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` + timestamp int64 + hasTimestamp bool // views is used as buffer for data when its length is large // enough to store a VectorisedView. views [8]buffer.View `state:"nosave"` @@ -52,6 +54,7 @@ type endpoint struct { rcvBufSizeMax int `state:".(int)"` rcvBufSize int rcvClosed bool + rcvTimestamp bool // The following fields are protected by the mu mutex. mu sync.RWMutex `state:"nosave"` @@ -134,7 +137,7 @@ func (e *endpoint) Close() { // Read reads data from the endpoint. This method does not block if // there is no data pending. -func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, *tcpip.Error) { +func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { e.rcvMu.Lock() if e.rcvList.Empty() { @@ -143,12 +146,13 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, *tcpip.Error) { err = tcpip.ErrClosedForReceive } e.rcvMu.Unlock() - return buffer.View{}, err + return buffer.View{}, tcpip.ControlMessages{}, err } p := e.rcvList.Front() e.rcvList.Remove(p) e.rcvBufSize -= p.data.Size() + ts := e.rcvTimestamp e.rcvMu.Unlock() @@ -156,7 +160,12 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, *tcpip.Error) { *addr = p.senderAddress } - return p.data.ToView(), nil + if ts && !p.hasTimestamp { + // Linux uses the current time. + p.timestamp = e.stack.NowNanoseconds() + } + + return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -299,8 +308,8 @@ func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, *tc } // Peek only returns data from a single datagram, so do nothing here. -func (e *endpoint) Peek([][]byte) (uintptr, *tcpip.Error) { - return 0, nil +func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) { + return 0, tcpip.ControlMessages{}, nil } // SetSockOpt sets a socket option. Currently not supported. @@ -322,6 +331,11 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { } e.v6only = v != 0 + + case tcpip.TimestampOption: + e.rcvMu.Lock() + e.rcvTimestamp = v != 0 + e.rcvMu.Unlock() } return nil } @@ -370,6 +384,14 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } e.rcvMu.Unlock() return nil + + case *tcpip.TimestampOption: + e.rcvMu.Lock() + *o = 0 + if e.rcvTimestamp { + *o = 1 + } + e.rcvMu.Unlock() } return tcpip.ErrUnknownProtocolOption @@ -733,6 +755,11 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv e.rcvList.PushBack(pkt) e.rcvBufSize += vv.Size() + if e.rcvTimestamp { + pkt.timestamp = e.stack.NowNanoseconds() + pkt.hasTimestamp = true + } + e.rcvMu.Unlock() // Notify any waiters that there's data to be read now. diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 65c567952..1eb9ecb80 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -56,7 +56,7 @@ type headers struct { } func newDualTestContext(t *testing.T, mtu uint32) *testContext { - s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{udp.ProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{udp.ProtocolName}) id, linkEP := channel.New(256, mtu, "") if testing.Verbose() { @@ -260,12 +260,12 @@ func testV4Read(c *testContext) { defer c.wq.EventUnregister(&we) var addr tcpip.FullAddress - v, err := c.ep.Read(&addr) + v, _, err := c.ep.Read(&addr) if err == tcpip.ErrWouldBlock { // Wait for data to become available. select { case <-ch: - v, err = c.ep.Read(&addr) + v, _, err = c.ep.Read(&addr) if err != nil { c.t.Fatalf("Read failed: %v", err) } @@ -355,12 +355,12 @@ func TestV6ReadOnV6(t *testing.T) { defer c.wq.EventUnregister(&we) var addr tcpip.FullAddress - v, err := c.ep.Read(&addr) + v, _, err := c.ep.Read(&addr) if err == tcpip.ErrWouldBlock { // Wait for data to become available. select { case <-ch: - v, err = c.ep.Read(&addr) + v, _, err = c.ep.Read(&addr) if err != nil { c.t.Fatalf("Read failed: %v", err) } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index a470cb054..d63a9028e 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -37,6 +37,7 @@ import ( slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/time" "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.googlesource.com/gvisor/pkg/tcpip" "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" @@ -177,7 +178,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console // this point. Netns is configured before Run() is called. Netstack is // configured using a control uRPC message. Host network is configured inside // Run(). - networkStack := newEmptyNetworkStack(conf) + networkStack := newEmptyNetworkStack(conf, k) // Initiate the Kernel object, which is required by the Context passed // to createVFS in order to mount (among other things) procfs. @@ -337,7 +338,7 @@ func (l *Loader) WaitExit() kernel.ExitStatus { return l.k.GlobalInit().ExitStatus() } -func newEmptyNetworkStack(conf *Config) inet.Stack { +func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) inet.Stack { switch conf.Network { case NetworkHost: return hostinet.NewStack() @@ -346,7 +347,7 @@ func newEmptyNetworkStack(conf *Config) inet.Stack { // NetworkNone sets up loopback using netstack. netProtos := []string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName} protoNames := []string{tcp.ProtocolName, udp.ProtocolName} - return &epsocket.Stack{stack.New(netProtos, protoNames)} + return &epsocket.Stack{stack.New(clock, netProtos, protoNames)} default: panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) |