diff options
author | Dean Deng <deandeng@google.com> | 2020-05-01 12:53:15 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2020-05-01 12:54:41 -0700 |
commit | 82bae30ceea0984c94af3085866b58ec9e69ea67 (patch) | |
tree | 8ed93228f9a3d6ec0d5333dc7f6649570dffe427 /pkg/sentry/socket/netlink | |
parent | 8962b7840f577b15d526e360077eefdb5c6373ce (diff) |
Port netstack, hostinet, and netlink sockets to VFS2.
All three follow the same pattern:
1. Refactor VFS1 sockets into socketOpsCommon, so that most of the methods can
be shared with VFS2.
2. Create a FileDescriptionImpl with the corresponding socket operations,
rewriting the few that cannot be shared with VFS1.
3. Set up a VFS2 socket provider that creates a socket by setting up a dentry
in the global Kernel.socketMount and connecting it with a new
FileDescription.
This mostly completes the work for porting sockets to VFS2, and many syscall
tests can be enabled as a result.
There are several networking-related syscall tests that are still not passing:
1. net gofer tests
2. socketpair gofer tests
2. sendfile tests (splice is not implemented in VFS2 yet)
Updates #1478, #1484, #1485
PiperOrigin-RevId: 309457331
Diffstat (limited to 'pkg/sentry/socket/netlink')
-rw-r--r-- | pkg/sentry/socket/netlink/BUILD | 5 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/provider.go | 5 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/provider_vfs2.go | 71 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/socket.go | 72 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/socket_vfs2.go | 138 |
5 files changed, 262 insertions, 29 deletions
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 1911cd9b8..09ca00a4a 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -7,7 +7,9 @@ go_library( srcs = [ "message.go", "provider.go", + "provider_vfs2.go", "socket.go", + "socket_vfs2.go", ], visibility = ["//pkg/sentry:internal"], deps = [ @@ -18,6 +20,8 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", @@ -25,6 +29,7 @@ go_library( "//pkg/sentry/socket/netlink/port", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", + "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go index b0dc70e5c..0d45e5053 100644 --- a/pkg/sentry/socket/netlink/provider.go +++ b/pkg/sentry/socket/netlink/provider.go @@ -67,6 +67,8 @@ func RegisterProvider(protocol int, provider Provider) { protocols[protocol] = provider } +// LINT.IfChange + // socketProvider implements socket.Provider. type socketProvider struct { } @@ -105,7 +107,10 @@ func (*socketProvider) Pair(*kernel.Task, linux.SockType, int) (*fs.File, *fs.Fi return nil, nil, syserr.ErrNotSupported } +// LINT.ThenChange(./provider_vfs2.go) + // init registers the socket provider. func init() { socket.RegisterProvider(linux.AF_NETLINK, &socketProvider{}) + socket.RegisterProviderVFS2(linux.AF_NETLINK, &socketProviderVFS2{}) } diff --git a/pkg/sentry/socket/netlink/provider_vfs2.go b/pkg/sentry/socket/netlink/provider_vfs2.go new file mode 100644 index 000000000..dcd92b5cd --- /dev/null +++ b/pkg/sentry/socket/netlink/provider_vfs2.go @@ -0,0 +1,71 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netlink + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserr" +) + +// socketProviderVFS2 implements socket.Provider. +type socketProviderVFS2 struct { +} + +// Socket implements socket.Provider.Socket. +func (*socketProviderVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) { + // Netlink sockets must be specified as datagram or raw, but they + // behave the same regardless of type. + if stype != linux.SOCK_DGRAM && stype != linux.SOCK_RAW { + return nil, syserr.ErrSocketNotSupported + } + + provider, ok := protocols[protocol] + if !ok { + return nil, syserr.ErrProtocolNotSupported + } + + p, err := provider(t) + if err != nil { + return nil, err + } + + s, err := NewVFS2(t, stype, p) + if err != nil { + return nil, err + } + + vfsfd := &s.vfsfd + mnt := t.Kernel().SocketMount() + fs := mnt.Filesystem().Impl().(*kernfs.Filesystem) + d := sockfs.NewDentry(t.Credentials(), fs.NextIno()) + if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{ + DenyPRead: true, + DenyPWrite: true, + UseDentryMetadata: true, + }); err != nil { + return nil, syserr.FromError(err) + } + return vfsfd, nil +} + +// Pair implements socket.Provider.Pair by returning an error. +func (*socketProviderVFS2) Pair(*kernel.Task, linux.SockType, int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) { + // Netlink sockets never supports creating socket pairs. + return nil, nil, syserr.ErrNotSupported +} diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 2ca02567d..81f34c5a2 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -58,6 +58,8 @@ var errNoFilter = syserr.New("no filter attached", linux.ENOENT) // netlinkSocketDevice is the netlink socket virtual device. var netlinkSocketDevice = device.NewAnonDevice() +// LINT.IfChange + // Socket is the base socket type for netlink sockets. // // This implementation only supports userspace sending and receiving messages @@ -74,6 +76,14 @@ type Socket struct { fsutil.FileNoSplice `state:"nosave"` fsutil.FileNoopFlush `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + + socketOpsCommon +} + +// socketOpsCommon contains the socket operations common to VFS1 and VFS2. +// +// +stateify savable +type socketOpsCommon struct { socket.SendReceiveTimeout // ports provides netlink port allocation. @@ -140,17 +150,19 @@ func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socke } return &Socket{ - ports: t.Kernel().NetlinkPorts(), - protocol: protocol, - skType: skType, - ep: ep, - connection: connection, - sendBufferSize: defaultSendBufferSize, + socketOpsCommon: socketOpsCommon{ + ports: t.Kernel().NetlinkPorts(), + protocol: protocol, + skType: skType, + ep: ep, + connection: connection, + sendBufferSize: defaultSendBufferSize, + }, }, nil } // Release implements fs.FileOperations.Release. -func (s *Socket) Release() { +func (s *socketOpsCommon) Release() { s.connection.Release() s.ep.Close() @@ -160,7 +172,7 @@ func (s *Socket) Release() { } // Readiness implements waiter.Waitable.Readiness. -func (s *Socket) Readiness(mask waiter.EventMask) waiter.EventMask { +func (s *socketOpsCommon) Readiness(mask waiter.EventMask) waiter.EventMask { // ep holds messages to be read and thus handles EventIn readiness. ready := s.ep.Readiness(mask) @@ -174,18 +186,18 @@ func (s *Socket) Readiness(mask waiter.EventMask) waiter.EventMask { } // EventRegister implements waiter.Waitable.EventRegister. -func (s *Socket) EventRegister(e *waiter.Entry, mask waiter.EventMask) { +func (s *socketOpsCommon) EventRegister(e *waiter.Entry, mask waiter.EventMask) { s.ep.EventRegister(e, mask) // Writable readiness never changes, so no registration is needed. } // EventUnregister implements waiter.Waitable.EventUnregister. -func (s *Socket) EventUnregister(e *waiter.Entry) { +func (s *socketOpsCommon) EventUnregister(e *waiter.Entry) { s.ep.EventUnregister(e) } // Passcred implements transport.Credentialer.Passcred. -func (s *Socket) Passcred() bool { +func (s *socketOpsCommon) Passcred() bool { s.mu.Lock() passcred := s.passcred s.mu.Unlock() @@ -193,7 +205,7 @@ func (s *Socket) Passcred() bool { } // ConnectedPasscred implements transport.Credentialer.ConnectedPasscred. -func (s *Socket) ConnectedPasscred() bool { +func (s *socketOpsCommon) ConnectedPasscred() bool { // This socket is connected to the kernel, which doesn't need creds. // // This is arbitrary, as ConnectedPasscred on this type has no callers. @@ -227,7 +239,7 @@ func ExtractSockAddr(b []byte) (*linux.SockAddrNetlink, *syserr.Error) { // port of 0 defaults to the ThreadGroup ID. // // Preconditions: mu is held. -func (s *Socket) bindPort(t *kernel.Task, port int32) *syserr.Error { +func (s *socketOpsCommon) bindPort(t *kernel.Task, port int32) *syserr.Error { if s.bound { // Re-binding is only allowed if the port doesn't change. if port != s.portID { @@ -251,7 +263,7 @@ func (s *Socket) bindPort(t *kernel.Task, port int32) *syserr.Error { } // Bind implements socket.Socket.Bind. -func (s *Socket) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { +func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { a, err := ExtractSockAddr(sockaddr) if err != nil { return err @@ -269,7 +281,7 @@ func (s *Socket) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { } // Connect implements socket.Socket.Connect. -func (s *Socket) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { +func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { a, err := ExtractSockAddr(sockaddr) if err != nil { return err @@ -300,25 +312,25 @@ func (s *Socket) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr } // Accept implements socket.Socket.Accept. -func (s *Socket) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) { +func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) { // Netlink sockets never support accept. return 0, nil, 0, syserr.ErrNotSupported } // Listen implements socket.Socket.Listen. -func (s *Socket) Listen(t *kernel.Task, backlog int) *syserr.Error { +func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error { // Netlink sockets never support listen. return syserr.ErrNotSupported } // Shutdown implements socket.Socket.Shutdown. -func (s *Socket) Shutdown(t *kernel.Task, how int) *syserr.Error { +func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error { // Netlink sockets never support shutdown. return syserr.ErrNotSupported } // GetSockOpt implements socket.Socket.GetSockOpt. -func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) { +func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) { switch level { case linux.SOL_SOCKET: switch name { @@ -369,7 +381,7 @@ func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem. } // SetSockOpt implements socket.Socket.SetSockOpt. -func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { +func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { switch level { case linux.SOL_SOCKET: switch name { @@ -466,7 +478,7 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy } // GetSockName implements socket.Socket.GetSockName. -func (s *Socket) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { +func (s *socketOpsCommon) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { s.mu.Lock() defer s.mu.Unlock() @@ -478,7 +490,7 @@ func (s *Socket) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Er } // GetPeerName implements socket.Socket.GetPeerName. -func (s *Socket) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { +func (s *socketOpsCommon) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { sa := &linux.SockAddrNetlink{ Family: linux.AF_NETLINK, // TODO(b/68878065): Support non-kernel peers. For now the peer @@ -489,7 +501,7 @@ func (s *Socket) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Er } // RecvMsg implements socket.Socket.RecvMsg. -func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { +func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { from := &linux.SockAddrNetlink{ Family: linux.AF_NETLINK, PortID: 0, @@ -590,7 +602,7 @@ func (kernelSCM) Credentials(*kernel.Task) (kernel.ThreadID, auth.UID, auth.GID) var kernelCreds = &kernelSCM{} // sendResponse sends the response messages in ms back to userspace. -func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error { +func (s *socketOpsCommon) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error { // Linux combines multiple netlink messages into a single datagram. bufs := make([][]byte, 0, len(ms.Messages)) for _, m := range ms.Messages { @@ -666,7 +678,7 @@ func dumpAckMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet) { // processMessages handles each message in buf, passing it to the protocol // handler for final handling. -func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error { +func (s *socketOpsCommon) processMessages(ctx context.Context, buf []byte) *syserr.Error { for len(buf) > 0 { msg, rest, ok := ParseMessage(buf) if !ok { @@ -698,7 +710,7 @@ func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error } // sendMsg is the core of message send, used for SendMsg and Write. -func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { +func (s *socketOpsCommon) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) { dstPort := int32(0) if len(to) != 0 { @@ -745,7 +757,7 @@ func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, } // SendMsg implements socket.Socket.SendMsg. -func (s *Socket) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { +func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { return s.sendMsg(t, src, to, flags, controlMessages) } @@ -756,11 +768,13 @@ func (s *Socket) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, } // State implements socket.Socket.State. -func (s *Socket) State() uint32 { +func (s *socketOpsCommon) State() uint32 { return s.ep.State() } // Type implements socket.Socket.Type. -func (s *Socket) Type() (family int, skType linux.SockType, protocol int) { +func (s *socketOpsCommon) Type() (family int, skType linux.SockType, protocol int) { return linux.AF_NETLINK, s.skType, s.protocol.Protocol() } + +// LINT.ThenChange(./socket_vfs2.go) diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go new file mode 100644 index 000000000..b854bf990 --- /dev/null +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -0,0 +1,138 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netlink + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// SocketVFS2 is the base VFS2 socket type for netlink sockets. +// +// This implementation only supports userspace sending and receiving messages +// to/from the kernel. +// +// SocketVFS2 implements socket.SocketVFS2 and transport.Credentialer. +type SocketVFS2 struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + vfs.DentryMetadataFileDescriptionImpl + + socketOpsCommon +} + +var _ socket.SocketVFS2 = (*SocketVFS2)(nil) +var _ transport.Credentialer = (*SocketVFS2)(nil) + +// NewVFS2 creates a new SocketVFS2. +func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketVFS2, *syserr.Error) { + // Datagram endpoint used to buffer kernel -> user messages. + ep := transport.NewConnectionless(t) + + // Bind the endpoint for good measure so we can connect to it. The + // bound address will never be exposed. + if err := ep.Bind(tcpip.FullAddress{Addr: "dummy"}, nil); err != nil { + ep.Close() + return nil, err + } + + // Create a connection from which the kernel can write messages. + connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t) + if err != nil { + ep.Close() + return nil, err + } + + return &SocketVFS2{ + socketOpsCommon: socketOpsCommon{ + ports: t.Kernel().NetlinkPorts(), + protocol: protocol, + skType: skType, + ep: ep, + connection: connection, + sendBufferSize: defaultSendBufferSize, + }, + }, nil +} + +// Readiness implements waiter.Waitable.Readiness. +func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask { + return s.socketOpsCommon.Readiness(mask) +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (s *SocketVFS2) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + s.socketOpsCommon.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (s *SocketVFS2) EventUnregister(e *waiter.Entry) { + s.socketOpsCommon.EventUnregister(e) +} + +// Ioctl implements vfs.FileDescriptionImpl. +func (*SocketVFS2) Ioctl(context.Context, usermem.IO, arch.SyscallArguments) (uintptr, error) { + // TODO(b/68878065): no ioctls supported. + return 0, syserror.ENOTTY +} + +// PRead implements vfs.FileDescriptionImpl. +func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + return 0, syserror.ESPIPE +} + +// Read implements vfs.FileDescriptionImpl. +func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + // All flags other than RWF_NOWAIT should be ignored. + // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + if dst.NumBytes() == 0 { + return 0, nil + } + return dst.CopyOutFrom(ctx, &unix.EndpointReader{ + Endpoint: s.ep, + }) +} + +// PWrite implements vfs.FileDescriptionImpl. +func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + return 0, syserror.ESPIPE +} + +// Write implements vfs.FileDescriptionImpl. +func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + // All flags other than RWF_NOWAIT should be ignored. + // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{}) + return int64(n), err.ToError() +} |