diff options
Diffstat (limited to 'pkg/sentry/fs/host/socket.go')
-rw-r--r-- | pkg/sentry/fs/host/socket.go | 281 |
1 files changed, 141 insertions, 140 deletions
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index 577e9e272..e454b6fe5 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -41,137 +41,6 @@ import ( // N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max). const maxSendBufferSize = 8 << 20 -// newSocket allocates a new unix socket with host endpoint. -func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error) { - ownedfd := orgfd - srfd := -1 - if saveable { - var err error - ownedfd, err = syscall.Dup(orgfd) - if err != nil { - return nil, err - } - srfd = orgfd - } - f := fd.New(ownedfd) - var q waiter.Queue - e, err := NewConnectedEndpoint(f, &q, "" /* path */) - if err != nil { - if saveable { - f.Close() - } else { - f.Release() - } - return nil, syserr.TranslateNetstackError(err).ToError() - } - - e.srfd = srfd - e.Init() - - ep := unix.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) - - return unixsocket.New(ctx, ep), nil -} - -// NewSocketWithDirent allocates a new unix socket with host endpoint. -// -// This is currently only used by unsaveable Gofer nodes. -// -// NewSocketWithDirent takes ownership of f on success. -func NewSocketWithDirent(ctx context.Context, d *fs.Dirent, f *fd.FD, flags fs.FileFlags) (*fs.File, error) { - f2 := fd.New(f.FD()) - var q waiter.Queue - e, err := NewConnectedEndpoint(f2, &q, "" /* path */) - if err != nil { - f2.Release() - return nil, syserr.TranslateNetstackError(err).ToError() - } - - // Take ownship of the FD. - f.Release() - - e.Init() - - ep := unix.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) - - return unixsocket.NewWithDirent(ctx, d, ep, flags), nil -} - -func sendMsg(fd int, data [][]byte, controlMessages unix.ControlMessages, maxlen int, truncate bool) (uintptr, *tcpip.Error) { - if !controlMessages.Empty() { - return 0, tcpip.ErrInvalidEndpointState - } - n, totalLen, err := fdWriteVec(fd, data, maxlen, truncate) - if n < totalLen && err == nil { - // The host only returns a short write if it would otherwise - // block (and only for stream sockets). - err = syserror.EAGAIN - } - return n, translateError(err) -} - -func recvMsg(fd int, data [][]byte, numRights uintptr, peek bool, addr *tcpip.FullAddress, maxlen int) (uintptr, uintptr, unix.ControlMessages, *tcpip.Error) { - var cm unet.ControlMessage - if numRights > 0 { - cm.EnableFDs(int(numRights)) - } - rl, ml, cl, rerr := fdReadVec(fd, data, []byte(cm), peek, maxlen) - if rl == 0 && rerr != nil { - return 0, 0, unix.ControlMessages{}, translateError(rerr) - } - - // Trim the control data if we received less than the full amount. - if cl < uint64(len(cm)) { - cm = cm[:cl] - } - - // Avoid extra allocations in the case where there isn't any control data. - if len(cm) == 0 { - return rl, ml, unix.ControlMessages{}, translateError(rerr) - } - - fds, err := cm.ExtractFDs() - if err != nil { - return 0, 0, unix.ControlMessages{}, translateError(err) - } - - if len(fds) == 0 { - return rl, ml, unix.ControlMessages{}, translateError(rerr) - } - return rl, ml, control.New(nil, nil, newSCMRights(fds)), translateError(rerr) -} - -// NewConnectedEndpoint creates a new ConnectedEndpoint backed by a host FD -// that will pretend to be bound at a given sentry path. -// -// The caller is responsible for calling Init(). Additionaly, Release needs to -// be called twice because host.ConnectedEndpoint is both a unix.Receiver and -// unix.ConnectedEndpoint. -func NewConnectedEndpoint(file *fd.FD, queue *waiter.Queue, path string) (*ConnectedEndpoint, *tcpip.Error) { - e := ConnectedEndpoint{ - path: path, - queue: queue, - file: file, - srfd: -1, - } - - if err := e.init(); err != nil { - return nil, err - } - - // AtomicRefCounters start off with a single reference. We need two. - e.ref.IncRef() - - return &e, nil -} - -// Init will do initialization required without holding other locks. -func (c *ConnectedEndpoint) Init() { - if err := fdnotifier.AddFD(int32(c.file.FD()), c.queue); err != nil { - panic(err) - } -} - // ConnectedEndpoint is a host FD backed implementation of // unix.ConnectedEndpoint and unix.Receiver. // @@ -249,6 +118,93 @@ func (c *ConnectedEndpoint) init() *tcpip.Error { return nil } +// NewConnectedEndpoint creates a new ConnectedEndpoint backed by a host FD +// that will pretend to be bound at a given sentry path. +// +// The caller is responsible for calling Init(). Additionaly, Release needs to +// be called twice because ConnectedEndpoint is both a unix.Receiver and +// unix.ConnectedEndpoint. +func NewConnectedEndpoint(file *fd.FD, queue *waiter.Queue, path string) (*ConnectedEndpoint, *tcpip.Error) { + e := ConnectedEndpoint{ + path: path, + queue: queue, + file: file, + srfd: -1, + } + + if err := e.init(); err != nil { + return nil, err + } + + // AtomicRefCounters start off with a single reference. We need two. + e.ref.IncRef() + + return &e, nil +} + +// Init will do initialization required without holding other locks. +func (c *ConnectedEndpoint) Init() { + if err := fdnotifier.AddFD(int32(c.file.FD()), c.queue); err != nil { + panic(err) + } +} + +// NewSocketWithDirent allocates a new unix socket with host endpoint. +// +// This is currently only used by unsaveable Gofer nodes. +// +// NewSocketWithDirent takes ownership of f on success. +func NewSocketWithDirent(ctx context.Context, d *fs.Dirent, f *fd.FD, flags fs.FileFlags) (*fs.File, error) { + f2 := fd.New(f.FD()) + var q waiter.Queue + e, err := NewConnectedEndpoint(f2, &q, "" /* path */) + if err != nil { + f2.Release() + return nil, syserr.TranslateNetstackError(err).ToError() + } + + // Take ownship of the FD. + f.Release() + + e.Init() + + ep := unix.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) + + return unixsocket.NewWithDirent(ctx, d, ep, flags), nil +} + +// newSocket allocates a new unix socket with host endpoint. +func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error) { + ownedfd := orgfd + srfd := -1 + if saveable { + var err error + ownedfd, err = syscall.Dup(orgfd) + if err != nil { + return nil, err + } + srfd = orgfd + } + f := fd.New(ownedfd) + var q waiter.Queue + e, err := NewConnectedEndpoint(f, &q, "" /* path */) + if err != nil { + if saveable { + f.Close() + } else { + f.Release() + } + return nil, syserr.TranslateNetstackError(err).ToError() + } + + e.srfd = srfd + e.Init() + + ep := unix.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) + + return unixsocket.New(ctx, ep), nil +} + // Send implements unix.ConnectedEndpoint.Send. func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages unix.ControlMessages, from tcpip.FullAddress) (uintptr, bool, *tcpip.Error) { c.mu.RLock() @@ -257,14 +213,30 @@ func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages unix.ControlMess return 0, false, tcpip.ErrClosedForSend } + if !controlMessages.Empty() { + return 0, false, tcpip.ErrInvalidEndpointState + } + // Since stream sockets don't preserve message boundaries, we can write // only as much of the message as fits in the send buffer. truncate := c.stype == unix.SockStream - n, err := sendMsg(c.file.FD(), data, controlMessages, c.sndbuf, truncate) - // There is no need for the callee to call SendNotify because sendMsg uses - // the host's sendmsg(2) and the host kernel's queue. - return n, false, err + n, totalLen, err := fdWriteVec(c.file.FD(), data, c.sndbuf, truncate) + if n < totalLen && err == nil { + // The host only returns a short write if it would otherwise + // block (and only for stream sockets). + err = syserror.EAGAIN + } + if n > 0 && err != syserror.EAGAIN { + // The caller may need to block to send more data, but + // otherwise there isn't anything that can be done about an + // error with a partial write. + err = nil + } + + // There is no need for the callee to call SendNotify because fdWriteVec + // uses the host's sendmsg(2) and the host kernel's queue. + return n, false, translateError(err) } // SendNotify implements unix.ConnectedEndpoint.SendNotify. @@ -318,17 +290,46 @@ func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights uintptr, p return 0, 0, unix.ControlMessages{}, tcpip.FullAddress{}, false, tcpip.ErrClosedForReceive } + var cm unet.ControlMessage + if numRights > 0 { + cm.EnableFDs(int(numRights)) + } + // N.B. Unix sockets don't have a receive buffer, the send buffer // serves both purposes. - rl, ml, cm, err := recvMsg(c.file.FD(), data, numRights, peek, nil, c.sndbuf) - if rl > 0 && err == tcpip.ErrWouldBlock { - // Message did not fill buffer; that's fine, no need to block. + rl, ml, cl, err := fdReadVec(c.file.FD(), data, []byte(cm), peek, c.sndbuf) + if rl > 0 && err != nil { + // We got some data, so all we need to do on error is return + // the data that we got. Short reads are fine, no need to + // block. err = nil } + if err != nil { + return 0, 0, unix.ControlMessages{}, tcpip.FullAddress{}, false, translateError(err) + } - // There is no need for the callee to call RecvNotify because recvMsg uses + // There is no need for the callee to call RecvNotify because fdReadVec uses // the host's recvmsg(2) and the host kernel's queue. - return rl, ml, cm, tcpip.FullAddress{Addr: tcpip.Address(c.path)}, false, err + + // Trim the control data if we received less than the full amount. + if cl < uint64(len(cm)) { + cm = cm[:cl] + } + + // Avoid extra allocations in the case where there isn't any control data. + if len(cm) == 0 { + return rl, ml, unix.ControlMessages{}, tcpip.FullAddress{Addr: tcpip.Address(c.path)}, false, nil + } + + fds, err := cm.ExtractFDs() + if err != nil { + return 0, 0, unix.ControlMessages{}, tcpip.FullAddress{}, false, translateError(err) + } + + if len(fds) == 0 { + return rl, ml, unix.ControlMessages{}, tcpip.FullAddress{Addr: tcpip.Address(c.path)}, false, nil + } + return rl, ml, control.New(nil, nil, newSCMRights(fds)), tcpip.FullAddress{Addr: tcpip.Address(c.path)}, false, nil } // close releases all resources related to the endpoint. |