summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fs/host/file.go13
-rw-r--r--pkg/sentry/fs/host/inode.go7
-rw-r--r--pkg/sentry/fs/host/socket.go16
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go8
-rw-r--r--pkg/sentry/fsimpl/host/socket.go18
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go6
-rw-r--r--pkg/sentry/fsimpl/kernfs/synthetic_directory.go11
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go6
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go10
-rw-r--r--pkg/sentry/fsimpl/tmpfs/directory.go8
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go3
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go8
-rw-r--r--pkg/sentry/kernel/task_exit.go6
-rw-r--r--pkg/sentry/loader/elf.go6
-rw-r--r--pkg/sentry/platform/ptrace/filters.go2
-rw-r--r--pkg/sentry/socket/hostinet/socket_vfs2.go3
-rw-r--r--pkg/sentry/socket/netstack/netstack.go12
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go3
-rw-r--r--pkg/sentry/socket/unix/BUILD1
-rw-r--r--pkg/sentry/socket/unix/transport/BUILD2
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go30
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned_state.go5
-rw-r--r--pkg/sentry/socket/unix/transport/connectionless.go13
-rw-r--r--pkg/sentry/socket/unix/transport/connectionless_state.go20
-rw-r--r--pkg/sentry/socket/unix/transport/queue.go9
-rw-r--r--pkg/sentry/socket/unix/transport/unix.go77
-rw-r--r--pkg/sentry/socket/unix/unix_vfs2.go3
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/sys_socket.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/socket.go3
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go2
-rw-r--r--pkg/sentry/vfs/permissions.go6
33 files changed, 221 insertions, 101 deletions
diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go
index 86d1a87f0..fd4e057d8 100644
--- a/pkg/sentry/fs/host/file.go
+++ b/pkg/sentry/fs/host/file.go
@@ -180,16 +180,9 @@ func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer
// IterateDir implements fs.DirIterator.IterateDir.
func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
- if f.dirinfo == nil {
- f.dirinfo = new(dirInfo)
- f.dirinfo.buf = make([]byte, usermem.PageSize)
- }
- entries, err := f.iops.readdirAll(f.dirinfo)
- if err != nil {
- return offset, err
- }
- count, err := fs.GenericReaddir(dirCtx, fs.NewSortedDentryMap(entries))
- return offset + count, err
+ // We only support non-directory file descriptors that have been
+ // imported, so just claim that this isn't a directory, even if it is.
+ return offset, syscall.ENOTDIR
}
// Write implements fs.FileOperations.Write.
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 2c14aa6d9..df4b265fa 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -411,10 +411,3 @@ func (i *inodeOperations) DropLink() {}
// NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange.
func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {}
-
-// readdirAll returns all of the directory entries in i.
-func (i *inodeOperations) readdirAll(d *dirInfo) (map[string]fs.DentAttr, error) {
- // We only support non-directory file descriptors that have been
- // imported, so just claim that this isn't a directory, even if it is.
- return nil, syscall.ENOTDIR
-}
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index 07b4fb70f..2b58fc52c 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -16,6 +16,7 @@ package host
import (
"fmt"
+ "sync/atomic"
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -206,7 +207,7 @@ func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMess
// only as much of the message as fits in the send buffer.
truncate := c.stype == linux.SOCK_STREAM
- n, totalLen, err := fdWriteVec(c.file.FD(), data, c.sndbuf, truncate)
+ n, totalLen, err := fdWriteVec(c.file.FD(), data, c.SendMaxQueueSize(), truncate)
if n < totalLen && err == nil {
// The host only returns a short write if it would otherwise
// block (and only for stream sockets).
@@ -282,7 +283,7 @@ func (c *ConnectedEndpoint) Recv(ctx context.Context, data [][]byte, creds bool,
// N.B. Unix sockets don't have a receive buffer, the send buffer
// serves both purposes.
- rl, ml, cl, cTrunc, err := fdReadVec(c.file.FD(), data, []byte(cm), peek, c.sndbuf)
+ rl, ml, cl, cTrunc, err := fdReadVec(c.file.FD(), data, []byte(cm), peek, c.RecvMaxQueueSize())
if rl > 0 && err != nil {
// We got some data, so all we need to do on error is return
// the data that we got. Short reads are fine, no need to
@@ -363,14 +364,14 @@ func (c *ConnectedEndpoint) RecvQueuedSize() int64 {
// SendMaxQueueSize implements transport.Receiver.SendMaxQueueSize.
func (c *ConnectedEndpoint) SendMaxQueueSize() int64 {
- return int64(c.sndbuf)
+ return atomic.LoadInt64(&c.sndbuf)
}
// RecvMaxQueueSize implements transport.Receiver.RecvMaxQueueSize.
func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 {
// N.B. Unix sockets don't use the receive buffer. We'll claim it is
// the same size as the send buffer.
- return int64(c.sndbuf)
+ return atomic.LoadInt64(&c.sndbuf)
}
// Release implements transport.ConnectedEndpoint.Release and transport.Receiver.Release.
@@ -381,4 +382,11 @@ func (c *ConnectedEndpoint) Release(ctx context.Context) {
// CloseUnread implements transport.ConnectedEndpoint.CloseUnread.
func (c *ConnectedEndpoint) CloseUnread() {}
+// SetSendBufferSize implements transport.ConnectedEndpoint.SetSendBufferSize.
+func (c *ConnectedEndpoint) SetSendBufferSize(v int64) (newSz int64) {
+ // gVisor does not permit setting of SO_SNDBUF for host backed unix domain
+ // sockets.
+ return atomic.LoadInt64(&c.sndbuf)
+}
+
// LINT.ThenChange(../../fsimpl/host/socket.go)
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 98f7bc52f..094d993a8 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1216,7 +1216,13 @@ func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats
}
func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error {
- return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&child.uid)))
+ return vfs.CheckDeleteSticky(
+ creds,
+ linux.FileMode(atomic.LoadUint32(&d.mode)),
+ auth.KUID(atomic.LoadUint32(&d.uid)),
+ auth.KUID(atomic.LoadUint32(&child.uid)),
+ auth.KGID(atomic.LoadUint32(&child.gid)),
+ )
}
func dentryUIDFromP9UID(uid p9.UID) uint32 {
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index 72aa535f8..6763f5b0c 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -16,6 +16,7 @@ package host
import (
"fmt"
+ "sync/atomic"
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -111,7 +112,7 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
}
c.stype = linux.SockType(stype)
- c.sndbuf = int64(sndbuf)
+ atomic.StoreInt64(&c.sndbuf, int64(sndbuf))
return nil
}
@@ -150,7 +151,7 @@ func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMess
// only as much of the message as fits in the send buffer.
truncate := c.stype == linux.SOCK_STREAM
- n, totalLen, err := fdWriteVec(c.fd, data, c.sndbuf, truncate)
+ n, totalLen, err := fdWriteVec(c.fd, data, c.SendMaxQueueSize(), truncate)
if n < totalLen && err == nil {
// The host only returns a short write if it would otherwise
// block (and only for stream sockets).
@@ -226,7 +227,7 @@ func (c *ConnectedEndpoint) Recv(ctx context.Context, data [][]byte, creds bool,
// N.B. Unix sockets don't have a receive buffer, the send buffer
// serves both purposes.
- rl, ml, cl, cTrunc, err := fdReadVec(c.fd, data, []byte(cm), peek, c.sndbuf)
+ rl, ml, cl, cTrunc, err := fdReadVec(c.fd, data, []byte(cm), peek, c.RecvMaxQueueSize())
if rl > 0 && err != nil {
// We got some data, so all we need to do on error is return
// the data that we got. Short reads are fine, no need to
@@ -300,14 +301,14 @@ func (c *ConnectedEndpoint) RecvQueuedSize() int64 {
// SendMaxQueueSize implements transport.Receiver.SendMaxQueueSize.
func (c *ConnectedEndpoint) SendMaxQueueSize() int64 {
- return int64(c.sndbuf)
+ return atomic.LoadInt64(&c.sndbuf)
}
// RecvMaxQueueSize implements transport.Receiver.RecvMaxQueueSize.
func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 {
// N.B. Unix sockets don't use the receive buffer. We'll claim it is
// the same size as the send buffer.
- return int64(c.sndbuf)
+ return atomic.LoadInt64(&c.sndbuf)
}
func (c *ConnectedEndpoint) destroyLocked() {
@@ -327,6 +328,13 @@ func (c *ConnectedEndpoint) Release(ctx context.Context) {
// CloseUnread implements transport.ConnectedEndpoint.CloseUnread.
func (c *ConnectedEndpoint) CloseUnread() {}
+// SetSendBufferSize implements transport.ConnectedEndpoint.SetSendBufferSize.
+func (c *ConnectedEndpoint) SetSendBufferSize(v int64) (newSz int64) {
+ // gVisor does not permit setting of SO_SNDBUF for host backed unix domain
+ // sockets.
+ return atomic.LoadInt64(&c.sndbuf)
+}
+
// SCMConnectedEndpoint represents an endpoint backed by a host fd that was
// passed through a gofer Unix socket. It resembles ConnectedEndpoint, with the
// following differences:
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index a7a553619..d6dd6bc41 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -668,6 +668,12 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// Can we create the dst dentry?
var dst *Dentry
pc := rp.Component()
+ if pc == "." || pc == ".." {
+ if noReplace {
+ return syserror.EEXIST
+ }
+ return syserror.EBUSY
+ }
switch err := checkCreateLocked(ctx, rp.Credentials(), pc, dstDir); err {
case nil:
// Ok, continue with rename as replacement.
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
index 463d77d79..11694c392 100644
--- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
@@ -42,19 +42,16 @@ type syntheticDirectory struct {
var _ Inode = (*syntheticDirectory)(nil)
func newSyntheticDirectory(ctx context.Context, creds *auth.Credentials, perm linux.FileMode) Inode {
- inode := &syntheticDirectory{}
- inode.Init(ctx, creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, perm)
- return inode
-}
-
-func (dir *syntheticDirectory) Init(ctx context.Context, creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
if perm&^linux.PermissionsMask != 0 {
panic(fmt.Sprintf("perm contains non-permission bits: %#o", perm))
}
- dir.InodeAttrs.Init(ctx, creds, devMajor, devMinor, ino, linux.S_IFDIR|perm)
+ dir := &syntheticDirectory{}
+ dir.InitRefs()
+ dir.InodeAttrs.Init(ctx, creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, linux.S_IFDIR|perm)
dir.OrderedChildren.Init(OrderedChildrenOptions{
Writable: true,
})
+ return dir
}
// Open implements Inode.Open.
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index e46f593c7..b36031291 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -1068,7 +1068,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if err != nil {
return err
}
- if err := vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&oldParent.mode)), auth.KUID(atomic.LoadUint32(&renamed.uid))); err != nil {
+ if err := oldParent.mayDelete(creds, renamed); err != nil {
return err
}
if renamed.isDir() {
@@ -1317,7 +1317,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
if !child.isDir() {
return syserror.ENOTDIR
}
- if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(atomic.LoadUint32(&parent.mode)), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil {
+ if err := parent.mayDelete(rp.Credentials(), child); err != nil {
return err
}
child.dirMu.Lock()
@@ -1584,7 +1584,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if child.isDir() {
return syserror.EISDIR
}
- if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(parentMode), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil {
+ if err := parent.mayDelete(rp.Credentials(), child); err != nil {
return err
}
if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 082fa6504..acd3684c6 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -760,6 +760,16 @@ func (d *dentry) updateAfterSetStatLocked(opts *vfs.SetStatOptions) {
}
}
+func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error {
+ return vfs.CheckDeleteSticky(
+ creds,
+ linux.FileMode(atomic.LoadUint32(&d.mode)),
+ auth.KUID(atomic.LoadUint32(&d.uid)),
+ auth.KUID(atomic.LoadUint32(&child.uid)),
+ auth.KGID(atomic.LoadUint32(&child.gid)),
+ )
+}
+
// fileDescription is embedded by overlay implementations of
// vfs.FileDescriptionImpl.
//
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
index e90669cf0..417ac2eff 100644
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -84,7 +84,13 @@ func (dir *directory) removeChildLocked(child *dentry) {
}
func (dir *directory) mayDelete(creds *auth.Credentials, child *dentry) error {
- return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&dir.inode.mode)), auth.KUID(atomic.LoadUint32(&child.inode.uid)))
+ return vfs.CheckDeleteSticky(
+ creds,
+ linux.FileMode(atomic.LoadUint32(&dir.inode.mode)),
+ auth.KUID(atomic.LoadUint32(&dir.inode.uid)),
+ auth.KUID(atomic.LoadUint32(&child.inode.uid)),
+ auth.KGID(atomic.LoadUint32(&child.inode.gid)),
+ )
}
// +stateify savable
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index 6255a7c84..82a743ff3 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -656,6 +656,9 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64,
// Write to that memory as usual.
seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{}
+
+ default:
+ panic("unreachable")
}
}
exitLoop:
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index c551acd99..2c8668fc4 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -247,11 +247,15 @@ func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)
return 0, syscall.EPIPE
}
- // POSIX requires that a write smaller than atomicIOBytes (PIPE_BUF) be
- // atomic, but requires no atomicity for writes larger than this.
avail := p.max - p.size
+ if avail == 0 {
+ return 0, syserror.ErrWouldBlock
+ }
short := false
if count > avail {
+ // POSIX requires that a write smaller than atomicIOBytes
+ // (PIPE_BUF) be atomic, but requires no atomicity for writes
+ // larger than this.
if count <= atomicIOBytes {
return 0, syserror.ErrWouldBlock
}
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 16986244c..f7765fa3a 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -415,6 +415,12 @@ func (tg *ThreadGroup) anyNonExitingTaskLocked() *Task {
func (t *Task) reparentLocked(parent *Task) {
oldParent := t.parent
t.parent = parent
+ if oldParent != nil {
+ delete(oldParent.children, t)
+ }
+ if parent != nil {
+ parent.children[t] = struct{}{}
+ }
// If a thread group leader's parent changes, reset the thread group's
// termination signal to SIGCHLD and re-check exit notification. (Compare
// kernel/exit.c:reparent_leader().)
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
index 98af2cc38..cd9fa4031 100644
--- a/pkg/sentry/loader/elf.go
+++ b/pkg/sentry/loader/elf.go
@@ -517,12 +517,14 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in
start, ok = start.AddLength(uint64(offset))
if !ok {
- panic(fmt.Sprintf("Start %#x + offset %#x overflows?", start, offset))
+ ctx.Infof(fmt.Sprintf("Start %#x + offset %#x overflows?", start, offset))
+ return loadedELF{}, syserror.EINVAL
}
end, ok = end.AddLength(uint64(offset))
if !ok {
- panic(fmt.Sprintf("End %#x + offset %#x overflows?", end, offset))
+ ctx.Infof(fmt.Sprintf("End %#x + offset %#x overflows?", end, offset))
+ return loadedELF{}, syserror.EINVAL
}
info.entry, ok = info.entry.AddLength(uint64(offset))
diff --git a/pkg/sentry/platform/ptrace/filters.go b/pkg/sentry/platform/ptrace/filters.go
index b0970e356..20fc62acb 100644
--- a/pkg/sentry/platform/ptrace/filters.go
+++ b/pkg/sentry/platform/ptrace/filters.go
@@ -17,14 +17,12 @@ package ptrace
import (
"syscall"
- "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/seccomp"
)
// SyscallFilters returns syscalls made exclusively by the ptrace platform.
func (*PTrace) SyscallFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
- unix.SYS_GETCPU: {},
syscall.SYS_PTRACE: {},
syscall.SYS_TGKILL: {},
syscall.SYS_WAIT4: {},
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index f82c7c224..dc03ccb47 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -80,8 +80,7 @@ func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol in
// Release implements vfs.FileDescriptionImpl.Release.
func (s *socketVFS2) Release(ctx context.Context) {
- t := kernel.TaskFromContext(ctx)
- t.Kernel().DeleteSocketVFS2(&s.vfsfd)
+ kernel.KernelFromContext(ctx).DeleteSocketVFS2(&s.vfsfd)
s.socketOpsCommon.Release(ctx)
}
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 69693f263..cee8120ab 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -855,10 +855,7 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
return nil, syserr.ErrInvalidArgument
}
- size, err := ep.SocketOptions().GetSendBufferSize()
- if err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ size := ep.SocketOptions().GetSendBufferSize()
if size > math.MaxInt32 {
size = math.MaxInt32
@@ -1647,13 +1644,6 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- family, _, _ := s.Type()
- // TODO(gvisor.dev/issue/5132): We currently do not support
- // setting this option for unix sockets.
- if family == linux.AF_UNIX {
- return nil
- }
-
v := usermem.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetSendBufferSize(int64(v), true)
return nil
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 24922c400..fc29f8f13 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -79,8 +79,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu
// Release implements vfs.FileDescriptionImpl.Release.
func (s *SocketVFS2) Release(ctx context.Context) {
- t := kernel.TaskFromContext(ctx)
- t.Kernel().DeleteSocketVFS2(&s.vfsfd)
+ kernel.KernelFromContext(ctx).DeleteSocketVFS2(&s.vfsfd)
s.socketOpsCommon.Release(ctx)
}
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index cce0acc33..acf2ab8e7 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -51,6 +51,7 @@ go_library(
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/fs/lock",
"//pkg/sentry/fsimpl/sockfs",
+ "//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/time",
"//pkg/sentry/socket",
diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD
index 3ebbd28b0..0d11bb251 100644
--- a/pkg/sentry/socket/unix/transport/BUILD
+++ b/pkg/sentry/socket/unix/transport/BUILD
@@ -32,6 +32,7 @@ go_library(
"connectioned.go",
"connectioned_state.go",
"connectionless.go",
+ "connectionless_state.go",
"queue.go",
"queue_refs.go",
"transport_message_list.go",
@@ -45,6 +46,7 @@ go_library(
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
+ "//pkg/sentry/inet",
"//pkg/sync",
"//pkg/syserr",
"//pkg/tcpip",
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index fc5b823b0..809c95429 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -128,7 +128,9 @@ func newConnectioned(ctx context.Context, stype linux.SockType, uid UniqueIDProv
idGenerator: uid,
stype: stype,
}
- ep.ops.InitHandler(ep, nil, nil)
+
+ ep.ops.SetSendBufferSize(defaultBufferSize, false /* notify */)
+ ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits)
return ep
}
@@ -137,9 +139,9 @@ func NewPair(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) (E
a := newConnectioned(ctx, stype, uid)
b := newConnectioned(ctx, stype, uid)
- q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: initialLimit}
+ q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: defaultBufferSize}
q1.InitRefs()
- q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: initialLimit}
+ q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: defaultBufferSize}
q2.InitRefs()
if stype == linux.SOCK_STREAM {
@@ -173,7 +175,8 @@ func NewExternal(ctx context.Context, stype linux.SockType, uid UniqueIDProvider
idGenerator: uid,
stype: stype,
}
- ep.ops.InitHandler(ep, nil, nil)
+ ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits)
+ ep.ops.SetSendBufferSize(connected.SendMaxQueueSize(), false /* notify */)
return ep
}
@@ -296,16 +299,18 @@ func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce Conn
idGenerator: e.idGenerator,
stype: e.stype,
}
- ne.ops.InitHandler(ne, nil, nil)
+ ne.ops.InitHandler(ne, &stackHandler{}, getSendBufferLimits)
+ ne.ops.SetSendBufferSize(defaultBufferSize, false /* notify */)
- readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: initialLimit}
+ readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: defaultBufferSize}
readQueue.InitRefs()
ne.connected = &connectedEndpoint{
endpoint: ce,
writeQueue: readQueue,
}
- writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: initialLimit}
+ // Make sure the accepted endpoint inherits this listening socket's SO_SNDBUF.
+ writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: e.ops.GetSendBufferSize()}
writeQueue.InitRefs()
if e.stype == linux.SOCK_STREAM {
ne.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{readQueue: writeQueue}}
@@ -357,6 +362,9 @@ func (e *connectionedEndpoint) Connect(ctx context.Context, server BoundEndpoint
returnConnect := func(r Receiver, ce ConnectedEndpoint) {
e.receiver = r
e.connected = ce
+ // Make sure the newly created connected endpoint's write queue is updated
+ // to reflect this endpoint's send buffer size.
+ e.connected.SetSendBufferSize(e.ops.GetSendBufferSize())
}
return server.BidirectionalConnect(ctx, e, returnConnect)
@@ -495,3 +503,11 @@ func (e *connectionedEndpoint) State() uint32 {
}
return linux.SS_UNCONNECTED
}
+
+// OnSetSendBufferSize implements tcpip.SocketOptionsHandler.OnSetSendBufferSize.
+func (e *connectionedEndpoint) OnSetSendBufferSize(v int64) (newSz int64) {
+ if e.Connected() {
+ return e.baseEndpoint.connected.SetSendBufferSize(v)
+ }
+ return v
+}
diff --git a/pkg/sentry/socket/unix/transport/connectioned_state.go b/pkg/sentry/socket/unix/transport/connectioned_state.go
index 7e02a5db8..590b0bd01 100644
--- a/pkg/sentry/socket/unix/transport/connectioned_state.go
+++ b/pkg/sentry/socket/unix/transport/connectioned_state.go
@@ -51,3 +51,8 @@ func (e *connectionedEndpoint) loadAcceptedChan(acceptedSlice []*connectionedEnd
}
}
}
+
+// afterLoad is invoked by stateify.
+func (e *connectionedEndpoint) afterLoad() {
+ e.ops.InitHandler(e, &stackHandler{}, getSendBufferLimits)
+}
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index 20fa8b874..0be78480c 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -41,10 +41,11 @@ var (
// NewConnectionless creates a new unbound dgram endpoint.
func NewConnectionless(ctx context.Context) Endpoint {
ep := &connectionlessEndpoint{baseEndpoint{Queue: &waiter.Queue{}}}
- q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit}
+ q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: defaultBufferSize}
q.InitRefs()
ep.receiver = &queueReceiver{readQueue: &q}
- ep.ops.InitHandler(ep, nil, nil)
+ ep.ops.SetSendBufferSize(defaultBufferSize, false /* notify */)
+ ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits)
return ep
}
@@ -217,3 +218,11 @@ func (e *connectionlessEndpoint) State() uint32 {
return linux.SS_DISCONNECTING
}
}
+
+// OnSetSendBufferSize implements tcpip.SocketOptionsHandler.OnSetSendBufferSize.
+func (e *connectionlessEndpoint) OnSetSendBufferSize(v int64) (newSz int64) {
+ if e.Connected() {
+ return e.baseEndpoint.connected.SetSendBufferSize(v)
+ }
+ return v
+}
diff --git a/pkg/sentry/socket/unix/transport/connectionless_state.go b/pkg/sentry/socket/unix/transport/connectionless_state.go
new file mode 100644
index 000000000..2ef337ec8
--- /dev/null
+++ b/pkg/sentry/socket/unix/transport/connectionless_state.go
@@ -0,0 +1,20 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package transport
+
+// afterLoad is invoked by stateify.
+func (e *connectionlessEndpoint) afterLoad() {
+ e.ops.InitHandler(e, &stackHandler{}, getSendBufferLimits)
+}
diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go
index 342def28f..698a9a82c 100644
--- a/pkg/sentry/socket/unix/transport/queue.go
+++ b/pkg/sentry/socket/unix/transport/queue.go
@@ -237,9 +237,18 @@ func (q *queue) QueuedSize() int64 {
// MaxQueueSize returns the maximum number of bytes storable in the queue.
func (q *queue) MaxQueueSize() int64 {
+ q.mu.Lock()
+ defer q.mu.Unlock()
return q.limit
}
+// SetMaxQueueSize sets the maximum number of bytes storable in the queue.
+func (q *queue) SetMaxQueueSize(v int64) {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+ q.limit = v
+}
+
// CloseUnread sets flag to indicate that the peer is closed (not shutdown)
// with unread data. So if read on this queue shall return ECONNRESET error.
func (q *queue) CloseUnread() {
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 70227bbd2..ceada54a8 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -26,8 +26,16 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// initialLimit is the starting limit for the socket buffers.
-const initialLimit = 16 * 1024
+const (
+ // The minimum size of the send/receive buffers.
+ minimumBufferSize = 4 << 10 // 4 KiB (match default in linux)
+
+ // The default size of the send/receive buffers.
+ defaultBufferSize = 208 << 10 // 208 KiB (default in linux for net.core.wmem_default)
+
+ // The maximum permitted size for the send/receive buffers.
+ maxBufferSize = 4 << 20 // 4 MiB 4 MiB (default in linux for net.core.wmem_max)
+)
// A RightsControlMessage is a control message containing FDs.
//
@@ -627,6 +635,10 @@ type ConnectedEndpoint interface {
// CloseUnread sets the fact that this end is closed with unread data to
// the peer socket.
CloseUnread()
+
+ // SetSendBufferSize is called when the endpoint's send buffer size is
+ // changed.
+ SetSendBufferSize(v int64) (newSz int64)
}
// +stateify savable
@@ -722,6 +734,14 @@ func (e *connectedEndpoint) CloseUnread() {
e.writeQueue.CloseUnread()
}
+// SetSendBufferSize implements ConnectedEndpoint.SetSendBufferSize.
+// SetSendBufferSize sets the send buffer size for the write queue to the
+// specified value.
+func (e *connectedEndpoint) SetSendBufferSize(v int64) (newSz int64) {
+ e.writeQueue.SetMaxQueueSize(v)
+ return v
+}
+
// baseEndpoint is an embeddable unix endpoint base used in both the connected and connectionless
// unix domain socket Endpoint implementations.
//
@@ -849,27 +869,6 @@ func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
return nil
}
-// IsUnixSocket implements tcpip.SocketOptionsHandler.IsUnixSocket.
-func (e *baseEndpoint) IsUnixSocket() bool {
- return true
-}
-
-// GetSendBufferSize implements tcpip.SocketOptionsHandler.GetSendBufferSize.
-func (e *baseEndpoint) GetSendBufferSize() (int64, tcpip.Error) {
- e.Lock()
- defer e.Unlock()
-
- if !e.Connected() {
- return -1, &tcpip.ErrNotConnected{}
- }
-
- v := e.connected.SendMaxQueueSize()
- if v < 0 {
- return -1, &tcpip.ErrQueueSizeNotSupported{}
- }
- return v, nil
-}
-
func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
switch opt {
case tcpip.ReceiveQueueSizeOption:
@@ -987,3 +986,35 @@ func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) {
func (*baseEndpoint) Release(context.Context) {
// Binding a baseEndpoint doesn't take a reference.
}
+
+// stackHandler is just a stub implementation of tcpip.StackHandler to provide
+// when initializing socketoptions.
+type stackHandler struct {
+}
+
+// Option implements tcpip.StackHandler.
+func (h *stackHandler) Option(option interface{}) tcpip.Error {
+ panic("unimplemented")
+}
+
+// TransportProtocolOption implements tcpip.StackHandler.
+func (h *stackHandler) TransportProtocolOption(proto tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error {
+ panic("unimplemented")
+}
+
+// getSendBufferLimits implements tcpip.GetSendBufferLimits.
+//
+// AF_UNIX sockets buffer sizes are not tied to the networking stack/namespace
+// in linux but are bound by net.core.(wmem|rmem)_(max|default).
+//
+// In gVisor net.core sysctls today are not exposed or if exposed are currently
+// tied to the networking stack in use. This makes it complicated for AF_UNIX
+// when we are in a new namespace w/ no networking stack. As a result for now we
+// define default/max values here in the unix socket implementation itself.
+func getSendBufferLimits(tcpip.StackHandler) tcpip.SendBufferSizeOption {
+ return tcpip.SendBufferSizeOption{
+ Min: minimumBufferSize,
+ Default: defaultBufferSize,
+ Max: maxBufferSize,
+ }
+}
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index a7d4d7f1f..9c037cbae 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -95,8 +95,7 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3
// DecRef implements RefCounter.DecRef.
func (s *SocketVFS2) DecRef(ctx context.Context) {
s.socketVFS2Refs.DecRef(func() {
- t := kernel.TaskFromContext(ctx)
- t.Kernel().DeleteSocketVFS2(&s.vfsfd)
+ kernel.KernelFromContext(ctx).DeleteSocketVFS2(&s.vfsfd)
s.ep.Close(ctx)
if s.abstractNamespace != nil {
s.abstractNamespace.Remove(s.abstractName, s)
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index a2e441448..4188502dc 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -62,7 +62,6 @@ go_library(
deps = [
"//pkg/abi",
"//pkg/abi/linux",
- "//pkg/binary",
"//pkg/bpf",
"//pkg/context",
"//pkg/log",
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index fe45225c1..686392cc8 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -18,7 +18,6 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -457,7 +456,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return 0, nil, e.ToError()
}
- vLen := int32(binary.Size(v))
+ vLen := int32(v.SizeBytes())
if _, err := primitive.CopyInt32Out(t, optLenAddr, vLen); err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index 9ee766552..2e59bd5b1 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -39,7 +39,6 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
- "//pkg/binary",
"//pkg/bits",
"//pkg/context",
"//pkg/fspath",
diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go
index f5795b4a8..7636ca453 100644
--- a/pkg/sentry/syscalls/linux/vfs2/socket.go
+++ b/pkg/sentry/syscalls/linux/vfs2/socket.go
@@ -18,7 +18,6 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -460,7 +459,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return 0, nil, e.ToError()
}
- vLen := int32(binary.Size(v))
+ vLen := int32(v.SizeBytes())
if _, err := primitive.CopyInt32Out(t, optLenAddr, vLen); err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index eb7d2fd3b..d2050b3f7 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -238,6 +238,8 @@ func (s *StaticData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// WritableDynamicBytesSource extends DynamicBytesSource to allow writes to the
// underlying source.
+//
+// TODO(b/179825241): Make utility for integer-based writable files.
type WritableDynamicBytesSource interface {
DynamicBytesSource
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index d48520d58..db6146fd2 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -243,11 +243,13 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOpt
// the given file mode, and if so, checks whether creds has permission to
// remove a file owned by childKUID from a directory with the given mode.
// CheckDeleteSticky is consistent with fs/linux.h:check_sticky().
-func CheckDeleteSticky(creds *auth.Credentials, parentMode linux.FileMode, childKUID auth.KUID) error {
+func CheckDeleteSticky(creds *auth.Credentials, parentMode linux.FileMode, parentKUID auth.KUID, childKUID auth.KUID, childKGID auth.KGID) error {
if parentMode&linux.ModeSticky == 0 {
return nil
}
- if CanActAsOwner(creds, childKUID) {
+ if creds.EffectiveKUID == childKUID ||
+ creds.EffectiveKUID == parentKUID ||
+ HasCapabilityOnFile(creds, linux.CAP_FOWNER, childKUID, childKGID) {
return nil
}
return syserror.EPERM